2010-11-27 François Dumont <francois.cppdevs@free.fr>
[official-gcc.git] / gcc / tree-vect-data-refs.c
blobce872cf26187b2d7d829d2f367659ee970553c9d
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
41 #include "toplev.h"
43 /* Need to include rtl.h, expr.h, etc. for optabs. */
44 #include "expr.h"
45 #include "optabs.h"
47 /* Return the smallest scalar part of STMT.
48 This is used to determine the vectype of the stmt. We generally set the
49 vectype according to the type of the result (lhs). For stmts whose
50 result-type is different than the type of the arguments (e.g., demotion,
51 promotion), vectype will be reset appropriately (later). Note that we have
52 to visit the smallest datatype in this function, because that determines the
53 VF. If the smallest datatype in the loop is present only as the rhs of a
54 promotion operation - we'd miss it.
55 Such a case, where a variable of this datatype does not appear in the lhs
56 anywhere in the loop, can only occur if it's an invariant: e.g.:
57 'int_x = (int) short_inv', which we'd expect to have been optimized away by
58 invariant motion. However, we cannot rely on invariant motion to always
59 take invariants out of the loop, and so in the case of promotion we also
60 have to check the rhs.
61 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
62 types. */
64 tree
65 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
66 HOST_WIDE_INT *rhs_size_unit)
68 tree scalar_type = gimple_expr_type (stmt);
69 HOST_WIDE_INT lhs, rhs;
71 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
73 if (is_gimple_assign (stmt)
74 && (gimple_assign_cast_p (stmt)
75 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
76 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
78 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
80 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
81 if (rhs < lhs)
82 scalar_type = rhs_type;
85 *lhs_size_unit = lhs;
86 *rhs_size_unit = rhs;
87 return scalar_type;
91 /* Find the place of the data-ref in STMT in the interleaving chain that starts
92 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
94 int
95 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
97 gimple next_stmt = first_stmt;
98 int result = 0;
100 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
101 return -1;
103 while (next_stmt && next_stmt != stmt)
105 result++;
106 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
109 if (next_stmt)
110 return result;
111 else
112 return -1;
116 /* Function vect_insert_into_interleaving_chain.
118 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
120 static void
121 vect_insert_into_interleaving_chain (struct data_reference *dra,
122 struct data_reference *drb)
124 gimple prev, next;
125 tree next_init;
126 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
127 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
129 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
130 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
131 while (next)
133 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
134 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
136 /* Insert here. */
137 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
138 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
139 return;
141 prev = next;
142 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
145 /* We got to the end of the list. Insert here. */
146 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
147 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
151 /* Function vect_update_interleaving_chain.
153 For two data-refs DRA and DRB that are a part of a chain interleaved data
154 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
156 There are four possible cases:
157 1. New stmts - both DRA and DRB are not a part of any chain:
158 FIRST_DR = DRB
159 NEXT_DR (DRB) = DRA
160 2. DRB is a part of a chain and DRA is not:
161 no need to update FIRST_DR
162 no need to insert DRB
163 insert DRA according to init
164 3. DRA is a part of a chain and DRB is not:
165 if (init of FIRST_DR > init of DRB)
166 FIRST_DR = DRB
167 NEXT(FIRST_DR) = previous FIRST_DR
168 else
169 insert DRB according to its init
170 4. both DRA and DRB are in some interleaving chains:
171 choose the chain with the smallest init of FIRST_DR
172 insert the nodes of the second chain into the first one. */
174 static void
175 vect_update_interleaving_chain (struct data_reference *drb,
176 struct data_reference *dra)
178 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
179 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
180 tree next_init, init_dra_chain, init_drb_chain;
181 gimple first_a, first_b;
182 tree node_init;
183 gimple node, prev, next, first_stmt;
185 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
186 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
188 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
189 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
190 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
191 return;
194 /* 2. DRB is a part of a chain and DRA is not. */
195 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
197 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
198 /* Insert DRA into the chain of DRB. */
199 vect_insert_into_interleaving_chain (dra, drb);
200 return;
203 /* 3. DRA is a part of a chain and DRB is not. */
204 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
206 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
207 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
208 old_first_stmt)));
209 gimple tmp;
211 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
213 /* DRB's init is smaller than the init of the stmt previously marked
214 as the first stmt of the interleaving chain of DRA. Therefore, we
215 update FIRST_STMT and put DRB in the head of the list. */
216 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
217 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
219 /* Update all the stmts in the list to point to the new FIRST_STMT. */
220 tmp = old_first_stmt;
221 while (tmp)
223 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
224 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
227 else
229 /* Insert DRB in the list of DRA. */
230 vect_insert_into_interleaving_chain (drb, dra);
231 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
233 return;
236 /* 4. both DRA and DRB are in some interleaving chains. */
237 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
238 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
239 if (first_a == first_b)
240 return;
241 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
242 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
244 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
246 /* Insert the nodes of DRA chain into the DRB chain.
247 After inserting a node, continue from this node of the DRB chain (don't
248 start from the beginning. */
249 node = DR_GROUP_FIRST_DR (stmtinfo_a);
250 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
251 first_stmt = first_b;
253 else
255 /* Insert the nodes of DRB chain into the DRA chain.
256 After inserting a node, continue from this node of the DRA chain (don't
257 start from the beginning. */
258 node = DR_GROUP_FIRST_DR (stmtinfo_b);
259 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
260 first_stmt = first_a;
263 while (node)
265 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
266 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
267 while (next)
269 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
270 if (tree_int_cst_compare (next_init, node_init) > 0)
272 /* Insert here. */
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
274 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
275 prev = node;
276 break;
278 prev = next;
279 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
281 if (!next)
283 /* We got to the end of the list. Insert here. */
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
285 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
286 prev = node;
288 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
289 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
294 /* Function vect_equal_offsets.
296 Check if OFFSET1 and OFFSET2 are identical expressions. */
298 static bool
299 vect_equal_offsets (tree offset1, tree offset2)
301 bool res;
303 STRIP_NOPS (offset1);
304 STRIP_NOPS (offset2);
306 if (offset1 == offset2)
307 return true;
309 if (TREE_CODE (offset1) != TREE_CODE (offset2)
310 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
311 return false;
313 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
314 TREE_OPERAND (offset2, 0));
316 if (!res || !BINARY_CLASS_P (offset1))
317 return res;
319 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
320 TREE_OPERAND (offset2, 1));
322 return res;
326 /* Check dependence between DRA and DRB for basic block vectorization.
327 If the accesses share same bases and offsets, we can compare their initial
328 constant offsets to decide whether they differ or not. In case of a read-
329 write dependence we check that the load is before the store to ensure that
330 vectorization will not change the order of the accesses. */
332 static bool
333 vect_drs_dependent_in_basic_block (struct data_reference *dra,
334 struct data_reference *drb)
336 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
337 gimple earlier_stmt;
339 /* We only call this function for pairs of loads and stores, but we verify
340 it here. */
341 if (DR_IS_READ (dra) == DR_IS_READ (drb))
343 if (DR_IS_READ (dra))
344 return false;
345 else
346 return true;
349 /* Check that the data-refs have same bases and offsets. If not, we can't
350 determine if they are dependent. */
351 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
352 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
353 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
354 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
355 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
356 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
357 return true;
359 /* Check the types. */
360 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
361 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
363 if (type_size_a != type_size_b
364 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
365 TREE_TYPE (DR_REF (drb))))
366 return true;
368 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
369 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
371 /* Two different locations - no dependence. */
372 if (init_a != init_b)
373 return false;
375 /* We have a read-write dependence. Check that the load is before the store.
376 When we vectorize basic blocks, vector load can be only before
377 corresponding scalar load, and vector store can be only after its
378 corresponding scalar store. So the order of the acceses is preserved in
379 case the load is before the store. */
380 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
381 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
382 return false;
384 return true;
388 /* Function vect_check_interleaving.
390 Check if DRA and DRB are a part of interleaving. In case they are, insert
391 DRA and DRB in an interleaving chain. */
393 static bool
394 vect_check_interleaving (struct data_reference *dra,
395 struct data_reference *drb)
397 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
399 /* Check that the data-refs have same first location (except init) and they
400 are both either store or load (not load and store). */
401 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
402 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
403 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
404 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
405 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
406 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
407 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
408 || DR_IS_READ (dra) != DR_IS_READ (drb))
409 return false;
411 /* Check:
412 1. data-refs are of the same type
413 2. their steps are equal
414 3. the step (if greater than zero) is greater than the difference between
415 data-refs' inits. */
416 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
417 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
419 if (type_size_a != type_size_b
420 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
421 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
422 TREE_TYPE (DR_REF (drb))))
423 return false;
425 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
426 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
427 step = TREE_INT_CST_LOW (DR_STEP (dra));
429 if (init_a > init_b)
431 /* If init_a == init_b + the size of the type * k, we have an interleaving,
432 and DRB is accessed before DRA. */
433 diff_mod_size = (init_a - init_b) % type_size_a;
435 if (step && (init_a - init_b) > step)
436 return false;
438 if (diff_mod_size == 0)
440 vect_update_interleaving_chain (drb, dra);
441 if (vect_print_dump_info (REPORT_DR_DETAILS))
443 fprintf (vect_dump, "Detected interleaving ");
444 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
445 fprintf (vect_dump, " and ");
446 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
448 return true;
451 else
453 /* If init_b == init_a + the size of the type * k, we have an
454 interleaving, and DRA is accessed before DRB. */
455 diff_mod_size = (init_b - init_a) % type_size_a;
457 if (step && (init_b - init_a) > step)
458 return false;
460 if (diff_mod_size == 0)
462 vect_update_interleaving_chain (dra, drb);
463 if (vect_print_dump_info (REPORT_DR_DETAILS))
465 fprintf (vect_dump, "Detected interleaving ");
466 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
467 fprintf (vect_dump, " and ");
468 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
470 return true;
474 return false;
477 /* Check if data references pointed by DR_I and DR_J are same or
478 belong to same interleaving group. Return FALSE if drs are
479 different, otherwise return TRUE. */
481 static bool
482 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
484 gimple stmt_i = DR_STMT (dr_i);
485 gimple stmt_j = DR_STMT (dr_j);
487 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
488 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
489 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
490 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
491 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
492 return true;
493 else
494 return false;
497 /* If address ranges represented by DDR_I and DDR_J are equal,
498 return TRUE, otherwise return FALSE. */
500 static bool
501 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
503 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
504 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
505 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
506 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
507 return true;
508 else
509 return false;
512 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
513 tested at run-time. Return TRUE if DDR was successfully inserted.
514 Return false if versioning is not supported. */
516 static bool
517 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
519 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
521 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
522 return false;
524 if (vect_print_dump_info (REPORT_DR_DETAILS))
526 fprintf (vect_dump, "mark for run-time aliasing test between ");
527 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
528 fprintf (vect_dump, " and ");
529 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
532 if (optimize_loop_nest_for_size_p (loop))
534 if (vect_print_dump_info (REPORT_DR_DETAILS))
535 fprintf (vect_dump, "versioning not supported when optimizing for size.");
536 return false;
539 /* FORNOW: We don't support versioning with outer-loop vectorization. */
540 if (loop->inner)
542 if (vect_print_dump_info (REPORT_DR_DETAILS))
543 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
544 return false;
547 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
548 return true;
552 /* Function vect_analyze_data_ref_dependence.
554 Return TRUE if there (might) exist a dependence between a memory-reference
555 DRA and a memory-reference DRB. When versioning for alias may check a
556 dependence at run-time, return FALSE. Adjust *MAX_VF according to
557 the data dependence. */
559 static bool
560 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
561 loop_vec_info loop_vinfo, int *max_vf,
562 bool *data_dependence_in_bb)
564 unsigned int i;
565 struct loop *loop = NULL;
566 struct data_reference *dra = DDR_A (ddr);
567 struct data_reference *drb = DDR_B (ddr);
568 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
569 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
570 lambda_vector dist_v;
571 unsigned int loop_depth;
573 /* Don't bother to analyze statements marked as unvectorizable. */
574 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
575 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
576 return false;
578 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
580 /* Independent data accesses. */
581 vect_check_interleaving (dra, drb);
582 return false;
585 if (loop_vinfo)
586 loop = LOOP_VINFO_LOOP (loop_vinfo);
588 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
589 return false;
591 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
593 if (loop_vinfo)
595 if (vect_print_dump_info (REPORT_DR_DETAILS))
597 fprintf (vect_dump, "versioning for alias required: "
598 "can't determine dependence between ");
599 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
600 fprintf (vect_dump, " and ");
601 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
604 /* Add to list of ddrs that need to be tested at run-time. */
605 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
608 /* When vectorizing a basic block unknown depnedence can still mean
609 strided access. */
610 if (vect_check_interleaving (dra, drb))
611 return false;
613 if (vect_print_dump_info (REPORT_DR_DETAILS))
615 fprintf (vect_dump, "can't determine dependence between ");
616 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
617 fprintf (vect_dump, " and ");
618 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
621 /* We do not vectorize basic blocks with write-write dependencies. */
622 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
623 return true;
625 /* We deal with read-write dependencies in basic blocks later (by
626 verifying that all the loads in the basic block are before all the
627 stores). */
628 *data_dependence_in_bb = true;
629 return false;
632 /* Versioning for alias is not yet supported for basic block SLP, and
633 dependence distance is unapplicable, hence, in case of known data
634 dependence, basic block vectorization is impossible for now. */
635 if (!loop_vinfo)
637 if (dra != drb && vect_check_interleaving (dra, drb))
638 return false;
640 if (vect_print_dump_info (REPORT_DR_DETAILS))
642 fprintf (vect_dump, "determined dependence between ");
643 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
644 fprintf (vect_dump, " and ");
645 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
648 /* Do not vectorize basic blcoks with write-write dependences. */
649 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
650 return true;
652 /* Check if this dependence is allowed in basic block vectorization. */
653 return vect_drs_dependent_in_basic_block (dra, drb);
656 /* Loop-based vectorization and known data dependence. */
657 if (DDR_NUM_DIST_VECTS (ddr) == 0)
659 if (vect_print_dump_info (REPORT_DR_DETAILS))
661 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
662 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
663 fprintf (vect_dump, " and ");
664 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
666 /* Add to list of ddrs that need to be tested at run-time. */
667 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
670 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
671 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
673 int dist = dist_v[loop_depth];
675 if (vect_print_dump_info (REPORT_DR_DETAILS))
676 fprintf (vect_dump, "dependence distance = %d.", dist);
678 if (dist == 0)
680 if (vect_print_dump_info (REPORT_DR_DETAILS))
682 fprintf (vect_dump, "dependence distance == 0 between ");
683 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
684 fprintf (vect_dump, " and ");
685 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
688 /* For interleaving, mark that there is a read-write dependency if
689 necessary. We check before that one of the data-refs is store. */
690 if (DR_IS_READ (dra))
691 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
692 else
694 if (DR_IS_READ (drb))
695 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
698 continue;
701 if (dist > 0 && DDR_REVERSED_P (ddr))
703 /* If DDR_REVERSED_P the order of the data-refs in DDR was
704 reversed (to make distance vector positive), and the actual
705 distance is negative. */
706 if (vect_print_dump_info (REPORT_DR_DETAILS))
707 fprintf (vect_dump, "dependence distance negative.");
708 continue;
711 if (abs (dist) >= 2
712 && abs (dist) < *max_vf)
714 /* The dependence distance requires reduction of the maximal
715 vectorization factor. */
716 *max_vf = abs (dist);
717 if (vect_print_dump_info (REPORT_DR_DETAILS))
718 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
719 *max_vf);
722 if (abs (dist) >= *max_vf)
724 /* Dependence distance does not create dependence, as far as
725 vectorization is concerned, in this case. */
726 if (vect_print_dump_info (REPORT_DR_DETAILS))
727 fprintf (vect_dump, "dependence distance >= VF.");
728 continue;
731 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
733 fprintf (vect_dump, "not vectorized, possible dependence "
734 "between data-refs ");
735 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
736 fprintf (vect_dump, " and ");
737 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
740 return true;
743 return false;
746 /* Function vect_analyze_data_ref_dependences.
748 Examine all the data references in the loop, and make sure there do not
749 exist any data dependences between them. Set *MAX_VF according to
750 the maximum vectorization factor the data dependences allow. */
752 bool
753 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
754 bb_vec_info bb_vinfo, int *max_vf,
755 bool *data_dependence_in_bb)
757 unsigned int i;
758 VEC (ddr_p, heap) *ddrs = NULL;
759 struct data_dependence_relation *ddr;
761 if (vect_print_dump_info (REPORT_DETAILS))
762 fprintf (vect_dump, "=== vect_analyze_dependences ===");
764 if (loop_vinfo)
765 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
766 else
767 ddrs = BB_VINFO_DDRS (bb_vinfo);
769 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
770 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
771 data_dependence_in_bb))
772 return false;
774 return true;
778 /* Function vect_compute_data_ref_alignment
780 Compute the misalignment of the data reference DR.
782 Output:
783 1. If during the misalignment computation it is found that the data reference
784 cannot be vectorized then false is returned.
785 2. DR_MISALIGNMENT (DR) is defined.
787 FOR NOW: No analysis is actually performed. Misalignment is calculated
788 only for trivial cases. TODO. */
790 static bool
791 vect_compute_data_ref_alignment (struct data_reference *dr)
793 gimple stmt = DR_STMT (dr);
794 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
795 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
796 struct loop *loop = NULL;
797 tree ref = DR_REF (dr);
798 tree vectype;
799 tree base, base_addr;
800 bool base_aligned;
801 tree misalign;
802 tree aligned_to, alignment;
804 if (vect_print_dump_info (REPORT_DETAILS))
805 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
807 if (loop_vinfo)
808 loop = LOOP_VINFO_LOOP (loop_vinfo);
810 /* Initialize misalignment to unknown. */
811 SET_DR_MISALIGNMENT (dr, -1);
813 misalign = DR_INIT (dr);
814 aligned_to = DR_ALIGNED_TO (dr);
815 base_addr = DR_BASE_ADDRESS (dr);
816 vectype = STMT_VINFO_VECTYPE (stmt_info);
818 /* In case the dataref is in an inner-loop of the loop that is being
819 vectorized (LOOP), we use the base and misalignment information
820 relative to the outer-loop (LOOP). This is ok only if the misalignment
821 stays the same throughout the execution of the inner-loop, which is why
822 we have to check that the stride of the dataref in the inner-loop evenly
823 divides by the vector size. */
824 if (loop && nested_in_vect_loop_p (loop, stmt))
826 tree step = DR_STEP (dr);
827 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
829 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
831 if (vect_print_dump_info (REPORT_ALIGNMENT))
832 fprintf (vect_dump, "inner step divides the vector-size.");
833 misalign = STMT_VINFO_DR_INIT (stmt_info);
834 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
835 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
837 else
839 if (vect_print_dump_info (REPORT_ALIGNMENT))
840 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
841 misalign = NULL_TREE;
845 base = build_fold_indirect_ref (base_addr);
846 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
848 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
849 || !misalign)
851 if (vect_print_dump_info (REPORT_ALIGNMENT))
853 fprintf (vect_dump, "Unknown alignment for access: ");
854 print_generic_expr (vect_dump, base, TDF_SLIM);
856 return true;
859 if ((DECL_P (base)
860 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
861 alignment) >= 0)
862 || (TREE_CODE (base_addr) == SSA_NAME
863 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
864 TREE_TYPE (base_addr)))),
865 alignment) >= 0))
866 base_aligned = true;
867 else
868 base_aligned = false;
870 if (!base_aligned)
872 /* Do not change the alignment of global variables if
873 flag_section_anchors is enabled. */
874 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
875 || (TREE_STATIC (base) && flag_section_anchors))
877 if (vect_print_dump_info (REPORT_DETAILS))
879 fprintf (vect_dump, "can't force alignment of ref: ");
880 print_generic_expr (vect_dump, ref, TDF_SLIM);
882 return true;
885 /* Force the alignment of the decl.
886 NOTE: This is the only change to the code we make during
887 the analysis phase, before deciding to vectorize the loop. */
888 if (vect_print_dump_info (REPORT_DETAILS))
890 fprintf (vect_dump, "force alignment of ");
891 print_generic_expr (vect_dump, ref, TDF_SLIM);
894 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
895 DECL_USER_ALIGN (base) = 1;
898 /* At this point we assume that the base is aligned. */
899 gcc_assert (base_aligned
900 || (TREE_CODE (base) == VAR_DECL
901 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
903 /* If this is a backward running DR then first access in the larger
904 vectype actually is N-1 elements before the address in the DR.
905 Adjust misalign accordingly. */
906 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
908 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
909 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
910 otherwise we wouldn't be here. */
911 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
912 /* PLUS because DR_STEP was negative. */
913 misalign = size_binop (PLUS_EXPR, misalign, offset);
916 /* Modulo alignment. */
917 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
919 if (!host_integerp (misalign, 1))
921 /* Negative or overflowed misalignment value. */
922 if (vect_print_dump_info (REPORT_DETAILS))
923 fprintf (vect_dump, "unexpected misalign value");
924 return false;
927 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
929 if (vect_print_dump_info (REPORT_DETAILS))
931 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
932 print_generic_expr (vect_dump, ref, TDF_SLIM);
935 return true;
939 /* Function vect_compute_data_refs_alignment
941 Compute the misalignment of data references in the loop.
942 Return FALSE if a data reference is found that cannot be vectorized. */
944 static bool
945 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
946 bb_vec_info bb_vinfo)
948 VEC (data_reference_p, heap) *datarefs;
949 struct data_reference *dr;
950 unsigned int i;
952 if (loop_vinfo)
953 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
954 else
955 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
957 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
958 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
959 && !vect_compute_data_ref_alignment (dr))
961 if (bb_vinfo)
963 /* Mark unsupported statement as unvectorizable. */
964 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
965 continue;
967 else
968 return false;
971 return true;
975 /* Function vect_update_misalignment_for_peel
977 DR - the data reference whose misalignment is to be adjusted.
978 DR_PEEL - the data reference whose misalignment is being made
979 zero in the vector loop by the peel.
980 NPEEL - the number of iterations in the peel loop if the misalignment
981 of DR_PEEL is known at compile time. */
983 static void
984 vect_update_misalignment_for_peel (struct data_reference *dr,
985 struct data_reference *dr_peel, int npeel)
987 unsigned int i;
988 VEC(dr_p,heap) *same_align_drs;
989 struct data_reference *current_dr;
990 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
991 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
992 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
993 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
995 /* For interleaved data accesses the step in the loop must be multiplied by
996 the size of the interleaving group. */
997 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
998 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
999 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1000 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1002 /* It can be assumed that the data refs with the same alignment as dr_peel
1003 are aligned in the vector loop. */
1004 same_align_drs
1005 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1006 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1008 if (current_dr != dr)
1009 continue;
1010 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1011 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1012 SET_DR_MISALIGNMENT (dr, 0);
1013 return;
1016 if (known_alignment_for_access_p (dr)
1017 && known_alignment_for_access_p (dr_peel))
1019 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1020 int misal = DR_MISALIGNMENT (dr);
1021 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1022 misal += negative ? -npeel * dr_size : npeel * dr_size;
1023 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1024 SET_DR_MISALIGNMENT (dr, misal);
1025 return;
1028 if (vect_print_dump_info (REPORT_DETAILS))
1029 fprintf (vect_dump, "Setting misalignment to -1.");
1030 SET_DR_MISALIGNMENT (dr, -1);
1034 /* Function vect_verify_datarefs_alignment
1036 Return TRUE if all data references in the loop can be
1037 handled with respect to alignment. */
1039 bool
1040 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1042 VEC (data_reference_p, heap) *datarefs;
1043 struct data_reference *dr;
1044 enum dr_alignment_support supportable_dr_alignment;
1045 unsigned int i;
1047 if (loop_vinfo)
1048 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1049 else
1050 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1052 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1054 gimple stmt = DR_STMT (dr);
1055 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1057 /* For interleaving, only the alignment of the first access matters.
1058 Skip statements marked as not vectorizable. */
1059 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1060 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1061 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1062 continue;
1064 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1065 if (!supportable_dr_alignment)
1067 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1069 if (DR_IS_READ (dr))
1070 fprintf (vect_dump,
1071 "not vectorized: unsupported unaligned load.");
1072 else
1073 fprintf (vect_dump,
1074 "not vectorized: unsupported unaligned store.");
1076 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1078 return false;
1080 if (supportable_dr_alignment != dr_aligned
1081 && vect_print_dump_info (REPORT_ALIGNMENT))
1082 fprintf (vect_dump, "Vectorizing an unaligned access.");
1084 return true;
1088 /* Function vector_alignment_reachable_p
1090 Return true if vector alignment for DR is reachable by peeling
1091 a few loop iterations. Return false otherwise. */
1093 static bool
1094 vector_alignment_reachable_p (struct data_reference *dr)
1096 gimple stmt = DR_STMT (dr);
1097 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1098 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1100 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1102 /* For interleaved access we peel only if number of iterations in
1103 the prolog loop ({VF - misalignment}), is a multiple of the
1104 number of the interleaved accesses. */
1105 int elem_size, mis_in_elements;
1106 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1108 /* FORNOW: handle only known alignment. */
1109 if (!known_alignment_for_access_p (dr))
1110 return false;
1112 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1113 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1115 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1116 return false;
1119 /* If misalignment is known at the compile time then allow peeling
1120 only if natural alignment is reachable through peeling. */
1121 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1123 HOST_WIDE_INT elmsize =
1124 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1125 if (vect_print_dump_info (REPORT_DETAILS))
1127 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1128 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1130 if (DR_MISALIGNMENT (dr) % elmsize)
1132 if (vect_print_dump_info (REPORT_DETAILS))
1133 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1134 return false;
1138 if (!known_alignment_for_access_p (dr))
1140 tree type = (TREE_TYPE (DR_REF (dr)));
1141 tree ba = DR_BASE_OBJECT (dr);
1142 bool is_packed = false;
1144 if (ba)
1145 is_packed = contains_packed_reference (ba);
1147 if (vect_print_dump_info (REPORT_DETAILS))
1148 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1149 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1150 return true;
1151 else
1152 return false;
1155 return true;
1159 /* Calculate the cost of the memory access represented by DR. */
1161 static void
1162 vect_get_data_access_cost (struct data_reference *dr,
1163 unsigned int *inside_cost,
1164 unsigned int *outside_cost)
1166 gimple stmt = DR_STMT (dr);
1167 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1168 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1169 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1170 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1171 int ncopies = vf / nunits;
1172 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1174 if (!supportable_dr_alignment)
1175 *inside_cost = VECT_MAX_COST;
1176 else
1178 if (DR_IS_READ (dr))
1179 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1180 else
1181 vect_get_store_cost (dr, ncopies, inside_cost);
1184 if (vect_print_dump_info (REPORT_COST))
1185 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1186 "outside_cost = %d.", *inside_cost, *outside_cost);
1190 static hashval_t
1191 vect_peeling_hash (const void *elem)
1193 const struct _vect_peel_info *peel_info;
1195 peel_info = (const struct _vect_peel_info *) elem;
1196 return (hashval_t) peel_info->npeel;
1200 static int
1201 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1203 const struct _vect_peel_info *a, *b;
1205 a = (const struct _vect_peel_info *) elem1;
1206 b = (const struct _vect_peel_info *) elem2;
1207 return (a->npeel == b->npeel);
1211 /* Insert DR into peeling hash table with NPEEL as key. */
1213 static void
1214 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1215 int npeel)
1217 struct _vect_peel_info elem, *slot;
1218 void **new_slot;
1219 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1221 elem.npeel = npeel;
1222 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1223 &elem);
1224 if (slot)
1225 slot->count++;
1226 else
1228 slot = XNEW (struct _vect_peel_info);
1229 slot->npeel = npeel;
1230 slot->dr = dr;
1231 slot->count = 1;
1232 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1233 INSERT);
1234 *new_slot = slot;
1237 if (!supportable_dr_alignment && !flag_vect_cost_model)
1238 slot->count += VECT_MAX_COST;
1242 /* Traverse peeling hash table to find peeling option that aligns maximum
1243 number of data accesses. */
1245 static int
1246 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1248 vect_peel_info elem = (vect_peel_info) *slot;
1249 vect_peel_extended_info max = (vect_peel_extended_info) data;
1251 if (elem->count > max->peel_info.count)
1253 max->peel_info.npeel = elem->npeel;
1254 max->peel_info.count = elem->count;
1255 max->peel_info.dr = elem->dr;
1258 return 1;
1262 /* Traverse peeling hash table and calculate cost for each peeling option.
1263 Find the one with the lowest cost. */
1265 static int
1266 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1268 vect_peel_info elem = (vect_peel_info) *slot;
1269 vect_peel_extended_info min = (vect_peel_extended_info) data;
1270 int save_misalignment, dummy;
1271 unsigned int inside_cost = 0, outside_cost = 0, i;
1272 gimple stmt = DR_STMT (elem->dr);
1273 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1274 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1275 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1276 struct data_reference *dr;
1278 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1280 stmt = DR_STMT (dr);
1281 stmt_info = vinfo_for_stmt (stmt);
1282 /* For interleaving, only the alignment of the first access
1283 matters. */
1284 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1285 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1286 continue;
1288 save_misalignment = DR_MISALIGNMENT (dr);
1289 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1290 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1291 SET_DR_MISALIGNMENT (dr, save_misalignment);
1294 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1295 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1297 if (inside_cost < min->inside_cost
1298 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1300 min->inside_cost = inside_cost;
1301 min->outside_cost = outside_cost;
1302 min->peel_info.dr = elem->dr;
1303 min->peel_info.npeel = elem->npeel;
1306 return 1;
1310 /* Choose best peeling option by traversing peeling hash table and either
1311 choosing an option with the lowest cost (if cost model is enabled) or the
1312 option that aligns as many accesses as possible. */
1314 static struct data_reference *
1315 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1316 unsigned int *npeel)
1318 struct _vect_peel_extended_info res;
1320 res.peel_info.dr = NULL;
1322 if (flag_vect_cost_model)
1324 res.inside_cost = INT_MAX;
1325 res.outside_cost = INT_MAX;
1326 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1327 vect_peeling_hash_get_lowest_cost, &res);
1329 else
1331 res.peel_info.count = 0;
1332 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1333 vect_peeling_hash_get_most_frequent, &res);
1336 *npeel = res.peel_info.npeel;
1337 return res.peel_info.dr;
1341 /* Function vect_enhance_data_refs_alignment
1343 This pass will use loop versioning and loop peeling in order to enhance
1344 the alignment of data references in the loop.
1346 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1347 original loop is to be vectorized. Any other loops that are created by
1348 the transformations performed in this pass - are not supposed to be
1349 vectorized. This restriction will be relaxed.
1351 This pass will require a cost model to guide it whether to apply peeling
1352 or versioning or a combination of the two. For example, the scheme that
1353 intel uses when given a loop with several memory accesses, is as follows:
1354 choose one memory access ('p') which alignment you want to force by doing
1355 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1356 other accesses are not necessarily aligned, or (2) use loop versioning to
1357 generate one loop in which all accesses are aligned, and another loop in
1358 which only 'p' is necessarily aligned.
1360 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1361 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1362 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1364 Devising a cost model is the most critical aspect of this work. It will
1365 guide us on which access to peel for, whether to use loop versioning, how
1366 many versions to create, etc. The cost model will probably consist of
1367 generic considerations as well as target specific considerations (on
1368 powerpc for example, misaligned stores are more painful than misaligned
1369 loads).
1371 Here are the general steps involved in alignment enhancements:
1373 -- original loop, before alignment analysis:
1374 for (i=0; i<N; i++){
1375 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1376 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1379 -- After vect_compute_data_refs_alignment:
1380 for (i=0; i<N; i++){
1381 x = q[i]; # DR_MISALIGNMENT(q) = 3
1382 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1385 -- Possibility 1: we do loop versioning:
1386 if (p is aligned) {
1387 for (i=0; i<N; i++){ # loop 1A
1388 x = q[i]; # DR_MISALIGNMENT(q) = 3
1389 p[i] = y; # DR_MISALIGNMENT(p) = 0
1392 else {
1393 for (i=0; i<N; i++){ # loop 1B
1394 x = q[i]; # DR_MISALIGNMENT(q) = 3
1395 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1399 -- Possibility 2: we do loop peeling:
1400 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1401 x = q[i];
1402 p[i] = y;
1404 for (i = 3; i < N; i++){ # loop 2A
1405 x = q[i]; # DR_MISALIGNMENT(q) = 0
1406 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1409 -- Possibility 3: combination of loop peeling and versioning:
1410 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1411 x = q[i];
1412 p[i] = y;
1414 if (p is aligned) {
1415 for (i = 3; i<N; i++){ # loop 3A
1416 x = q[i]; # DR_MISALIGNMENT(q) = 0
1417 p[i] = y; # DR_MISALIGNMENT(p) = 0
1420 else {
1421 for (i = 3; i<N; i++){ # loop 3B
1422 x = q[i]; # DR_MISALIGNMENT(q) = 0
1423 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1427 These loops are later passed to loop_transform to be vectorized. The
1428 vectorizer will use the alignment information to guide the transformation
1429 (whether to generate regular loads/stores, or with special handling for
1430 misalignment). */
1432 bool
1433 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1435 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1436 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1437 enum dr_alignment_support supportable_dr_alignment;
1438 struct data_reference *dr0 = NULL, *first_store = NULL;
1439 struct data_reference *dr;
1440 unsigned int i, j;
1441 bool do_peeling = false;
1442 bool do_versioning = false;
1443 bool stat;
1444 gimple stmt;
1445 stmt_vec_info stmt_info;
1446 int vect_versioning_for_alias_required;
1447 unsigned int npeel = 0;
1448 bool all_misalignments_unknown = true;
1449 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1450 unsigned possible_npeel_number = 1;
1451 tree vectype;
1452 unsigned int nelements, mis, same_align_drs_max = 0;
1454 if (vect_print_dump_info (REPORT_DETAILS))
1455 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1457 /* While cost model enhancements are expected in the future, the high level
1458 view of the code at this time is as follows:
1460 A) If there is a misaligned access then see if peeling to align
1461 this access can make all data references satisfy
1462 vect_supportable_dr_alignment. If so, update data structures
1463 as needed and return true.
1465 B) If peeling wasn't possible and there is a data reference with an
1466 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1467 then see if loop versioning checks can be used to make all data
1468 references satisfy vect_supportable_dr_alignment. If so, update
1469 data structures as needed and return true.
1471 C) If neither peeling nor versioning were successful then return false if
1472 any data reference does not satisfy vect_supportable_dr_alignment.
1474 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1476 Note, Possibility 3 above (which is peeling and versioning together) is not
1477 being done at this time. */
1479 /* (1) Peeling to force alignment. */
1481 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1482 Considerations:
1483 + How many accesses will become aligned due to the peeling
1484 - How many accesses will become unaligned due to the peeling,
1485 and the cost of misaligned accesses.
1486 - The cost of peeling (the extra runtime checks, the increase
1487 in code size). */
1489 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1491 stmt = DR_STMT (dr);
1492 stmt_info = vinfo_for_stmt (stmt);
1494 /* For interleaving, only the alignment of the first access
1495 matters. */
1496 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1497 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1498 continue;
1500 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1501 do_peeling = vector_alignment_reachable_p (dr);
1502 if (do_peeling)
1504 if (known_alignment_for_access_p (dr))
1506 unsigned int npeel_tmp;
1507 bool negative = tree_int_cst_compare (DR_STEP (dr),
1508 size_zero_node) < 0;
1510 /* Save info about DR in the hash table. */
1511 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1512 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1513 htab_create (1, vect_peeling_hash,
1514 vect_peeling_hash_eq, free);
1516 vectype = STMT_VINFO_VECTYPE (stmt_info);
1517 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1518 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1519 TREE_TYPE (DR_REF (dr))));
1520 npeel_tmp = (negative
1521 ? (mis - nelements) : (nelements - mis))
1522 & (nelements - 1);
1524 /* For multiple types, it is possible that the bigger type access
1525 will have more than one peeling option. E.g., a loop with two
1526 types: one of size (vector size / 4), and the other one of
1527 size (vector size / 8). Vectorization factor will 8. If both
1528 access are misaligned by 3, the first one needs one scalar
1529 iteration to be aligned, and the second one needs 5. But the
1530 the first one will be aligned also by peeling 5 scalar
1531 iterations, and in that case both accesses will be aligned.
1532 Hence, except for the immediate peeling amount, we also want
1533 to try to add full vector size, while we don't exceed
1534 vectorization factor.
1535 We do this automtically for cost model, since we calculate cost
1536 for every peeling option. */
1537 if (!flag_vect_cost_model)
1538 possible_npeel_number = vf /nelements;
1540 /* Handle the aligned case. We may decide to align some other
1541 access, making DR unaligned. */
1542 if (DR_MISALIGNMENT (dr) == 0)
1544 npeel_tmp = 0;
1545 if (!flag_vect_cost_model)
1546 possible_npeel_number++;
1549 for (j = 0; j < possible_npeel_number; j++)
1551 gcc_assert (npeel_tmp <= vf);
1552 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1553 npeel_tmp += nelements;
1556 all_misalignments_unknown = false;
1557 /* Data-ref that was chosen for the case that all the
1558 misalignments are unknown is not relevant anymore, since we
1559 have a data-ref with known alignment. */
1560 dr0 = NULL;
1562 else
1564 /* If we don't know all the misalignment values, we prefer
1565 peeling for data-ref that has maximum number of data-refs
1566 with the same alignment, unless the target prefers to align
1567 stores over load. */
1568 if (all_misalignments_unknown)
1570 if (same_align_drs_max < VEC_length (dr_p,
1571 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1572 || !dr0)
1574 same_align_drs_max = VEC_length (dr_p,
1575 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1576 dr0 = dr;
1579 if (!first_store && DR_IS_WRITE (dr))
1580 first_store = dr;
1583 /* If there are both known and unknown misaligned accesses in the
1584 loop, we choose peeling amount according to the known
1585 accesses. */
1588 if (!supportable_dr_alignment)
1590 dr0 = dr;
1591 if (!first_store && DR_IS_WRITE (dr))
1592 first_store = dr;
1596 else
1598 if (!aligned_access_p (dr))
1600 if (vect_print_dump_info (REPORT_DETAILS))
1601 fprintf (vect_dump, "vector alignment may not be reachable");
1603 break;
1608 vect_versioning_for_alias_required
1609 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1611 /* Temporarily, if versioning for alias is required, we disable peeling
1612 until we support peeling and versioning. Often peeling for alignment
1613 will require peeling for loop-bound, which in turn requires that we
1614 know how to adjust the loop ivs after the loop. */
1615 if (vect_versioning_for_alias_required
1616 || !vect_can_advance_ivs_p (loop_vinfo)
1617 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1618 do_peeling = false;
1620 if (do_peeling && all_misalignments_unknown
1621 && vect_supportable_dr_alignment (dr0, false))
1624 /* Check if the target requires to prefer stores over loads, i.e., if
1625 misaligned stores are more expensive than misaligned loads (taking
1626 drs with same alignment into account). */
1627 if (first_store && DR_IS_READ (dr0))
1629 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1630 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1631 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1632 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1634 vect_get_data_access_cost (dr0, &load_inside_cost,
1635 &load_outside_cost);
1636 vect_get_data_access_cost (first_store, &store_inside_cost,
1637 &store_outside_cost);
1639 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1640 aligning the load DR0). */
1641 load_inside_penalty = store_inside_cost;
1642 load_outside_penalty = store_outside_cost;
1643 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1644 (vinfo_for_stmt (DR_STMT (first_store))),
1645 i, dr);
1646 i++)
1647 if (DR_IS_READ (dr))
1649 load_inside_penalty += load_inside_cost;
1650 load_outside_penalty += load_outside_cost;
1652 else
1654 load_inside_penalty += store_inside_cost;
1655 load_outside_penalty += store_outside_cost;
1658 /* Calculate the penalty for leaving DR0 unaligned (by
1659 aligning the FIRST_STORE). */
1660 store_inside_penalty = load_inside_cost;
1661 store_outside_penalty = load_outside_cost;
1662 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1663 (vinfo_for_stmt (DR_STMT (dr0))),
1664 i, dr);
1665 i++)
1666 if (DR_IS_READ (dr))
1668 store_inside_penalty += load_inside_cost;
1669 store_outside_penalty += load_outside_cost;
1671 else
1673 store_inside_penalty += store_inside_cost;
1674 store_outside_penalty += store_outside_cost;
1677 if (load_inside_penalty > store_inside_penalty
1678 || (load_inside_penalty == store_inside_penalty
1679 && load_outside_penalty > store_outside_penalty))
1680 dr0 = first_store;
1683 /* In case there are only loads with different unknown misalignments, use
1684 peeling only if it may help to align other accesses in the loop. */
1685 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1686 (vinfo_for_stmt (DR_STMT (dr0))))
1687 && vect_supportable_dr_alignment (dr0, false)
1688 != dr_unaligned_supported)
1689 do_peeling = false;
1692 if (do_peeling && !dr0)
1694 /* Peeling is possible, but there is no data access that is not supported
1695 unless aligned. So we try to choose the best possible peeling. */
1697 /* We should get here only if there are drs with known misalignment. */
1698 gcc_assert (!all_misalignments_unknown);
1700 /* Choose the best peeling from the hash table. */
1701 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1702 if (!dr0 || !npeel)
1703 do_peeling = false;
1706 if (do_peeling)
1708 stmt = DR_STMT (dr0);
1709 stmt_info = vinfo_for_stmt (stmt);
1710 vectype = STMT_VINFO_VECTYPE (stmt_info);
1711 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1713 if (known_alignment_for_access_p (dr0))
1715 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1716 size_zero_node) < 0;
1717 if (!npeel)
1719 /* Since it's known at compile time, compute the number of
1720 iterations in the peeled loop (the peeling factor) for use in
1721 updating DR_MISALIGNMENT values. The peeling factor is the
1722 vectorization factor minus the misalignment as an element
1723 count. */
1724 mis = DR_MISALIGNMENT (dr0);
1725 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1726 npeel = ((negative ? mis - nelements : nelements - mis)
1727 & (nelements - 1));
1730 /* For interleaved data access every iteration accesses all the
1731 members of the group, therefore we divide the number of iterations
1732 by the group size. */
1733 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1734 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1735 npeel /= DR_GROUP_SIZE (stmt_info);
1737 if (vect_print_dump_info (REPORT_DETAILS))
1738 fprintf (vect_dump, "Try peeling by %d", npeel);
1741 /* Ensure that all data refs can be vectorized after the peel. */
1742 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1744 int save_misalignment;
1746 if (dr == dr0)
1747 continue;
1749 stmt = DR_STMT (dr);
1750 stmt_info = vinfo_for_stmt (stmt);
1751 /* For interleaving, only the alignment of the first access
1752 matters. */
1753 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1754 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1755 continue;
1757 save_misalignment = DR_MISALIGNMENT (dr);
1758 vect_update_misalignment_for_peel (dr, dr0, npeel);
1759 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1760 SET_DR_MISALIGNMENT (dr, save_misalignment);
1762 if (!supportable_dr_alignment)
1764 do_peeling = false;
1765 break;
1769 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1771 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1772 if (!stat)
1773 do_peeling = false;
1774 else
1775 return stat;
1778 if (do_peeling)
1780 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1781 If the misalignment of DR_i is identical to that of dr0 then set
1782 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1783 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1784 by the peeling factor times the element size of DR_i (MOD the
1785 vectorization factor times the size). Otherwise, the
1786 misalignment of DR_i must be set to unknown. */
1787 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1788 if (dr != dr0)
1789 vect_update_misalignment_for_peel (dr, dr0, npeel);
1791 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1792 if (npeel)
1793 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1794 else
1795 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1796 SET_DR_MISALIGNMENT (dr0, 0);
1797 if (vect_print_dump_info (REPORT_ALIGNMENT))
1798 fprintf (vect_dump, "Alignment of access forced using peeling.");
1800 if (vect_print_dump_info (REPORT_DETAILS))
1801 fprintf (vect_dump, "Peeling for alignment will be applied.");
1803 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1804 gcc_assert (stat);
1805 return stat;
1810 /* (2) Versioning to force alignment. */
1812 /* Try versioning if:
1813 1) flag_tree_vect_loop_version is TRUE
1814 2) optimize loop for speed
1815 3) there is at least one unsupported misaligned data ref with an unknown
1816 misalignment, and
1817 4) all misaligned data refs with a known misalignment are supported, and
1818 5) the number of runtime alignment checks is within reason. */
1820 do_versioning =
1821 flag_tree_vect_loop_version
1822 && optimize_loop_nest_for_speed_p (loop)
1823 && (!loop->inner); /* FORNOW */
1825 if (do_versioning)
1827 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1829 stmt = DR_STMT (dr);
1830 stmt_info = vinfo_for_stmt (stmt);
1832 /* For interleaving, only the alignment of the first access
1833 matters. */
1834 if (aligned_access_p (dr)
1835 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1836 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1837 continue;
1839 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1841 if (!supportable_dr_alignment)
1843 gimple stmt;
1844 int mask;
1845 tree vectype;
1847 if (known_alignment_for_access_p (dr)
1848 || VEC_length (gimple,
1849 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1850 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1852 do_versioning = false;
1853 break;
1856 stmt = DR_STMT (dr);
1857 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1858 gcc_assert (vectype);
1860 /* The rightmost bits of an aligned address must be zeros.
1861 Construct the mask needed for this test. For example,
1862 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1863 mask must be 15 = 0xf. */
1864 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1866 /* FORNOW: use the same mask to test all potentially unaligned
1867 references in the loop. The vectorizer currently supports
1868 a single vector size, see the reference to
1869 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1870 vectorization factor is computed. */
1871 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1872 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1873 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1874 VEC_safe_push (gimple, heap,
1875 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1876 DR_STMT (dr));
1880 /* Versioning requires at least one misaligned data reference. */
1881 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1882 do_versioning = false;
1883 else if (!do_versioning)
1884 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1887 if (do_versioning)
1889 VEC(gimple,heap) *may_misalign_stmts
1890 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1891 gimple stmt;
1893 /* It can now be assumed that the data references in the statements
1894 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1895 of the loop being vectorized. */
1896 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1898 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1899 dr = STMT_VINFO_DATA_REF (stmt_info);
1900 SET_DR_MISALIGNMENT (dr, 0);
1901 if (vect_print_dump_info (REPORT_ALIGNMENT))
1902 fprintf (vect_dump, "Alignment of access forced using versioning.");
1905 if (vect_print_dump_info (REPORT_DETAILS))
1906 fprintf (vect_dump, "Versioning for alignment will be applied.");
1908 /* Peeling and versioning can't be done together at this time. */
1909 gcc_assert (! (do_peeling && do_versioning));
1911 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1912 gcc_assert (stat);
1913 return stat;
1916 /* This point is reached if neither peeling nor versioning is being done. */
1917 gcc_assert (! (do_peeling || do_versioning));
1919 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1920 return stat;
1924 /* Function vect_find_same_alignment_drs.
1926 Update group and alignment relations according to the chosen
1927 vectorization factor. */
1929 static void
1930 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1931 loop_vec_info loop_vinfo)
1933 unsigned int i;
1934 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1935 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1936 struct data_reference *dra = DDR_A (ddr);
1937 struct data_reference *drb = DDR_B (ddr);
1938 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1939 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1940 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1941 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1942 lambda_vector dist_v;
1943 unsigned int loop_depth;
1945 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1946 return;
1948 if (dra == drb)
1949 return;
1951 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1952 return;
1954 /* Loop-based vectorization and known data dependence. */
1955 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1956 return;
1958 /* Data-dependence analysis reports a distance vector of zero
1959 for data-references that overlap only in the first iteration
1960 but have different sign step (see PR45764).
1961 So as a sanity check require equal DR_STEP. */
1962 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1963 return;
1965 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1966 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1968 int dist = dist_v[loop_depth];
1970 if (vect_print_dump_info (REPORT_DR_DETAILS))
1971 fprintf (vect_dump, "dependence distance = %d.", dist);
1973 /* Same loop iteration. */
1974 if (dist == 0
1975 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1977 /* Two references with distance zero have the same alignment. */
1978 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1979 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1980 if (vect_print_dump_info (REPORT_ALIGNMENT))
1981 fprintf (vect_dump, "accesses have the same alignment.");
1982 if (vect_print_dump_info (REPORT_DR_DETAILS))
1984 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1985 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1986 fprintf (vect_dump, " and ");
1987 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1994 /* Function vect_analyze_data_refs_alignment
1996 Analyze the alignment of the data-references in the loop.
1997 Return FALSE if a data reference is found that cannot be vectorized. */
1999 bool
2000 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2001 bb_vec_info bb_vinfo)
2003 if (vect_print_dump_info (REPORT_DETAILS))
2004 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2006 /* Mark groups of data references with same alignment using
2007 data dependence information. */
2008 if (loop_vinfo)
2010 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2011 struct data_dependence_relation *ddr;
2012 unsigned int i;
2014 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2015 vect_find_same_alignment_drs (ddr, loop_vinfo);
2018 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2020 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2021 fprintf (vect_dump,
2022 "not vectorized: can't calculate alignment for data ref.");
2023 return false;
2026 return true;
2030 /* Analyze groups of strided accesses: check that DR belongs to a group of
2031 strided accesses of legal size, step, etc. Detect gaps, single element
2032 interleaving, and other special cases. Set strided access info.
2033 Collect groups of strided stores for further use in SLP analysis. */
2035 static bool
2036 vect_analyze_group_access (struct data_reference *dr)
2038 tree step = DR_STEP (dr);
2039 tree scalar_type = TREE_TYPE (DR_REF (dr));
2040 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2041 gimple stmt = DR_STMT (dr);
2042 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2043 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2044 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2045 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2046 HOST_WIDE_INT stride;
2047 bool slp_impossible = false;
2049 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2050 interleaving group (including gaps). */
2051 stride = dr_step / type_size;
2053 /* Not consecutive access is possible only if it is a part of interleaving. */
2054 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2056 /* Check if it this DR is a part of interleaving, and is a single
2057 element of the group that is accessed in the loop. */
2059 /* Gaps are supported only for loads. STEP must be a multiple of the type
2060 size. The size of the group must be a power of 2. */
2061 if (DR_IS_READ (dr)
2062 && (dr_step % type_size) == 0
2063 && stride > 0
2064 && exact_log2 (stride) != -1)
2066 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2067 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2068 if (vect_print_dump_info (REPORT_DR_DETAILS))
2070 fprintf (vect_dump, "Detected single element interleaving ");
2071 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2072 fprintf (vect_dump, " step ");
2073 print_generic_expr (vect_dump, step, TDF_SLIM);
2075 return true;
2078 if (vect_print_dump_info (REPORT_DETAILS))
2080 fprintf (vect_dump, "not consecutive access ");
2081 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2084 if (bb_vinfo)
2086 /* Mark the statement as unvectorizable. */
2087 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2088 return true;
2091 return false;
2094 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2096 /* First stmt in the interleaving chain. Check the chain. */
2097 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2098 struct data_reference *data_ref = dr;
2099 unsigned int count = 1;
2100 tree next_step;
2101 tree prev_init = DR_INIT (data_ref);
2102 gimple prev = stmt;
2103 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2105 while (next)
2107 /* Skip same data-refs. In case that two or more stmts share
2108 data-ref (supported only for loads), we vectorize only the first
2109 stmt, and the rest get their vectorized loads from the first
2110 one. */
2111 if (!tree_int_cst_compare (DR_INIT (data_ref),
2112 DR_INIT (STMT_VINFO_DATA_REF (
2113 vinfo_for_stmt (next)))))
2115 if (DR_IS_WRITE (data_ref))
2117 if (vect_print_dump_info (REPORT_DETAILS))
2118 fprintf (vect_dump, "Two store stmts share the same dr.");
2119 return false;
2122 /* Check that there is no load-store dependencies for this loads
2123 to prevent a case of load-store-load to the same location. */
2124 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2125 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2127 if (vect_print_dump_info (REPORT_DETAILS))
2128 fprintf (vect_dump,
2129 "READ_WRITE dependence in interleaving.");
2130 return false;
2133 /* For load use the same data-ref load. */
2134 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2136 prev = next;
2137 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2138 continue;
2140 prev = next;
2142 /* Check that all the accesses have the same STEP. */
2143 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2144 if (tree_int_cst_compare (step, next_step))
2146 if (vect_print_dump_info (REPORT_DETAILS))
2147 fprintf (vect_dump, "not consecutive access in interleaving");
2148 return false;
2151 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2152 /* Check that the distance between two accesses is equal to the type
2153 size. Otherwise, we have gaps. */
2154 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2155 - TREE_INT_CST_LOW (prev_init)) / type_size;
2156 if (diff != 1)
2158 /* FORNOW: SLP of accesses with gaps is not supported. */
2159 slp_impossible = true;
2160 if (DR_IS_WRITE (data_ref))
2162 if (vect_print_dump_info (REPORT_DETAILS))
2163 fprintf (vect_dump, "interleaved store with gaps");
2164 return false;
2167 gaps += diff - 1;
2170 /* Store the gap from the previous member of the group. If there is no
2171 gap in the access, DR_GROUP_GAP is always 1. */
2172 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2174 prev_init = DR_INIT (data_ref);
2175 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2176 /* Count the number of data-refs in the chain. */
2177 count++;
2180 /* COUNT is the number of accesses found, we multiply it by the size of
2181 the type to get COUNT_IN_BYTES. */
2182 count_in_bytes = type_size * count;
2184 /* Check that the size of the interleaving (including gaps) is not
2185 greater than STEP. */
2186 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2188 if (vect_print_dump_info (REPORT_DETAILS))
2190 fprintf (vect_dump, "interleaving size is greater than step for ");
2191 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2193 return false;
2196 /* Check that the size of the interleaving is equal to STEP for stores,
2197 i.e., that there are no gaps. */
2198 if (dr_step && dr_step != count_in_bytes)
2200 if (DR_IS_READ (dr))
2202 slp_impossible = true;
2203 /* There is a gap after the last load in the group. This gap is a
2204 difference between the stride and the number of elements. When
2205 there is no gap, this difference should be 0. */
2206 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2208 else
2210 if (vect_print_dump_info (REPORT_DETAILS))
2211 fprintf (vect_dump, "interleaved store with gaps");
2212 return false;
2216 /* Check that STEP is a multiple of type size. */
2217 if (dr_step && (dr_step % type_size) != 0)
2219 if (vect_print_dump_info (REPORT_DETAILS))
2221 fprintf (vect_dump, "step is not a multiple of type size: step ");
2222 print_generic_expr (vect_dump, step, TDF_SLIM);
2223 fprintf (vect_dump, " size ");
2224 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2225 TDF_SLIM);
2227 return false;
2230 /* FORNOW: we handle only interleaving that is a power of 2.
2231 We don't fail here if it may be still possible to vectorize the
2232 group using SLP. If not, the size of the group will be checked in
2233 vect_analyze_operations, and the vectorization will fail. */
2234 if (exact_log2 (stride) == -1)
2236 if (vect_print_dump_info (REPORT_DETAILS))
2237 fprintf (vect_dump, "interleaving is not a power of 2");
2239 if (slp_impossible)
2240 return false;
2243 if (stride == 0)
2244 stride = count;
2246 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2247 if (vect_print_dump_info (REPORT_DETAILS))
2248 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2250 /* SLP: create an SLP data structure for every interleaving group of
2251 stores for further analysis in vect_analyse_slp. */
2252 if (DR_IS_WRITE (dr) && !slp_impossible)
2254 if (loop_vinfo)
2255 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2256 stmt);
2257 if (bb_vinfo)
2258 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2259 stmt);
2263 return true;
2267 /* Analyze the access pattern of the data-reference DR.
2268 In case of non-consecutive accesses call vect_analyze_group_access() to
2269 analyze groups of strided accesses. */
2271 static bool
2272 vect_analyze_data_ref_access (struct data_reference *dr)
2274 tree step = DR_STEP (dr);
2275 tree scalar_type = TREE_TYPE (DR_REF (dr));
2276 gimple stmt = DR_STMT (dr);
2277 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2278 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2279 struct loop *loop = NULL;
2280 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2282 if (loop_vinfo)
2283 loop = LOOP_VINFO_LOOP (loop_vinfo);
2285 if (loop_vinfo && !step)
2287 if (vect_print_dump_info (REPORT_DETAILS))
2288 fprintf (vect_dump, "bad data-ref access in loop");
2289 return false;
2292 /* Don't allow invariant accesses in loops. */
2293 if (loop_vinfo && dr_step == 0)
2294 return false;
2296 if (loop && nested_in_vect_loop_p (loop, stmt))
2298 /* Interleaved accesses are not yet supported within outer-loop
2299 vectorization for references in the inner-loop. */
2300 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2302 /* For the rest of the analysis we use the outer-loop step. */
2303 step = STMT_VINFO_DR_STEP (stmt_info);
2304 dr_step = TREE_INT_CST_LOW (step);
2306 if (dr_step == 0)
2308 if (vect_print_dump_info (REPORT_ALIGNMENT))
2309 fprintf (vect_dump, "zero step in outer loop.");
2310 if (DR_IS_READ (dr))
2311 return true;
2312 else
2313 return false;
2317 /* Consecutive? */
2318 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2319 || (dr_step < 0
2320 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2322 /* Mark that it is not interleaving. */
2323 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2324 return true;
2327 if (loop && nested_in_vect_loop_p (loop, stmt))
2329 if (vect_print_dump_info (REPORT_ALIGNMENT))
2330 fprintf (vect_dump, "strided access in outer loop.");
2331 return false;
2334 /* Not consecutive access - check if it's a part of interleaving group. */
2335 return vect_analyze_group_access (dr);
2339 /* Function vect_analyze_data_ref_accesses.
2341 Analyze the access pattern of all the data references in the loop.
2343 FORNOW: the only access pattern that is considered vectorizable is a
2344 simple step 1 (consecutive) access.
2346 FORNOW: handle only arrays and pointer accesses. */
2348 bool
2349 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2351 unsigned int i;
2352 VEC (data_reference_p, heap) *datarefs;
2353 struct data_reference *dr;
2355 if (vect_print_dump_info (REPORT_DETAILS))
2356 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2358 if (loop_vinfo)
2359 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2360 else
2361 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2363 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2364 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2365 && !vect_analyze_data_ref_access (dr))
2367 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2368 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2370 if (bb_vinfo)
2372 /* Mark the statement as not vectorizable. */
2373 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2374 continue;
2376 else
2377 return false;
2380 return true;
2383 /* Function vect_prune_runtime_alias_test_list.
2385 Prune a list of ddrs to be tested at run-time by versioning for alias.
2386 Return FALSE if resulting list of ddrs is longer then allowed by
2387 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2389 bool
2390 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2392 VEC (ddr_p, heap) * ddrs =
2393 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2394 unsigned i, j;
2396 if (vect_print_dump_info (REPORT_DETAILS))
2397 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2399 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2401 bool found;
2402 ddr_p ddr_i;
2404 ddr_i = VEC_index (ddr_p, ddrs, i);
2405 found = false;
2407 for (j = 0; j < i; j++)
2409 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2411 if (vect_vfa_range_equal (ddr_i, ddr_j))
2413 if (vect_print_dump_info (REPORT_DR_DETAILS))
2415 fprintf (vect_dump, "found equal ranges ");
2416 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2417 fprintf (vect_dump, ", ");
2418 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2419 fprintf (vect_dump, " and ");
2420 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2421 fprintf (vect_dump, ", ");
2422 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2424 found = true;
2425 break;
2429 if (found)
2431 VEC_ordered_remove (ddr_p, ddrs, i);
2432 continue;
2434 i++;
2437 if (VEC_length (ddr_p, ddrs) >
2438 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2440 if (vect_print_dump_info (REPORT_DR_DETAILS))
2442 fprintf (vect_dump,
2443 "disable versioning for alias - max number of generated "
2444 "checks exceeded.");
2447 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2449 return false;
2452 return true;
2456 /* Function vect_analyze_data_refs.
2458 Find all the data references in the loop or basic block.
2460 The general structure of the analysis of data refs in the vectorizer is as
2461 follows:
2462 1- vect_analyze_data_refs(loop/bb): call
2463 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2464 in the loop/bb and their dependences.
2465 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2466 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2467 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2471 bool
2472 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2473 bb_vec_info bb_vinfo,
2474 int *min_vf)
2476 struct loop *loop = NULL;
2477 basic_block bb = NULL;
2478 unsigned int i;
2479 VEC (data_reference_p, heap) *datarefs;
2480 struct data_reference *dr;
2481 tree scalar_type;
2482 bool res;
2484 if (vect_print_dump_info (REPORT_DETAILS))
2485 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2487 if (loop_vinfo)
2489 loop = LOOP_VINFO_LOOP (loop_vinfo);
2490 res = compute_data_dependences_for_loop
2491 (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
2492 &LOOP_VINFO_DDRS (loop_vinfo));
2494 if (!res)
2496 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2497 fprintf (vect_dump, "not vectorized: loop contains function calls"
2498 " or data references that cannot be analyzed");
2499 return false;
2502 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2504 else
2506 bb = BB_VINFO_BB (bb_vinfo);
2507 res = compute_data_dependences_for_bb (bb, true,
2508 &BB_VINFO_DATAREFS (bb_vinfo),
2509 &BB_VINFO_DDRS (bb_vinfo));
2510 if (!res)
2512 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2513 fprintf (vect_dump, "not vectorized: basic block contains function"
2514 " calls or data references that cannot be analyzed");
2515 return false;
2518 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2521 /* Go through the data-refs, check that the analysis succeeded. Update
2522 pointer from stmt_vec_info struct to DR and vectype. */
2524 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2526 gimple stmt;
2527 stmt_vec_info stmt_info;
2528 tree base, offset, init;
2529 int vf;
2531 if (!dr || !DR_REF (dr))
2533 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2534 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2535 return false;
2538 stmt = DR_STMT (dr);
2539 stmt_info = vinfo_for_stmt (stmt);
2541 /* Check that analysis of the data-ref succeeded. */
2542 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2543 || !DR_STEP (dr))
2545 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2547 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2548 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2551 if (bb_vinfo)
2553 /* Mark the statement as not vectorizable. */
2554 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2555 continue;
2557 else
2558 return false;
2561 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2563 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2564 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2565 "constant");
2566 if (bb_vinfo)
2568 /* Mark the statement as not vectorizable. */
2569 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2570 continue;
2572 else
2573 return false;
2576 base = unshare_expr (DR_BASE_ADDRESS (dr));
2577 offset = unshare_expr (DR_OFFSET (dr));
2578 init = unshare_expr (DR_INIT (dr));
2580 if (stmt_could_throw_p (stmt))
2582 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2584 fprintf (vect_dump, "not vectorized: statement can throw an "
2585 "exception ");
2586 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2588 return false;
2591 /* Update DR field in stmt_vec_info struct. */
2593 /* If the dataref is in an inner-loop of the loop that is considered for
2594 for vectorization, we also want to analyze the access relative to
2595 the outer-loop (DR contains information only relative to the
2596 inner-most enclosing loop). We do that by building a reference to the
2597 first location accessed by the inner-loop, and analyze it relative to
2598 the outer-loop. */
2599 if (loop && nested_in_vect_loop_p (loop, stmt))
2601 tree outer_step, outer_base, outer_init;
2602 HOST_WIDE_INT pbitsize, pbitpos;
2603 tree poffset;
2604 enum machine_mode pmode;
2605 int punsignedp, pvolatilep;
2606 affine_iv base_iv, offset_iv;
2607 tree dinit;
2609 /* Build a reference to the first location accessed by the
2610 inner-loop: *(BASE+INIT). (The first location is actually
2611 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2612 tree inner_base = build_fold_indirect_ref
2613 (fold_build2 (POINTER_PLUS_EXPR,
2614 TREE_TYPE (base), base,
2615 fold_convert (sizetype, init)));
2617 if (vect_print_dump_info (REPORT_DETAILS))
2619 fprintf (vect_dump, "analyze in outer-loop: ");
2620 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2623 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2624 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2625 gcc_assert (outer_base != NULL_TREE);
2627 if (pbitpos % BITS_PER_UNIT != 0)
2629 if (vect_print_dump_info (REPORT_DETAILS))
2630 fprintf (vect_dump, "failed: bit offset alignment.\n");
2631 return false;
2634 outer_base = build_fold_addr_expr (outer_base);
2635 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2636 &base_iv, false))
2638 if (vect_print_dump_info (REPORT_DETAILS))
2639 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2640 return false;
2643 if (offset)
2645 if (poffset)
2646 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2647 poffset);
2648 else
2649 poffset = offset;
2652 if (!poffset)
2654 offset_iv.base = ssize_int (0);
2655 offset_iv.step = ssize_int (0);
2657 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2658 &offset_iv, false))
2660 if (vect_print_dump_info (REPORT_DETAILS))
2661 fprintf (vect_dump, "evolution of offset is not affine.\n");
2662 return false;
2665 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2666 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2667 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2668 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2669 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2671 outer_step = size_binop (PLUS_EXPR,
2672 fold_convert (ssizetype, base_iv.step),
2673 fold_convert (ssizetype, offset_iv.step));
2675 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2676 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2677 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2678 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2679 STMT_VINFO_DR_OFFSET (stmt_info) =
2680 fold_convert (ssizetype, offset_iv.base);
2681 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2682 size_int (highest_pow2_factor (offset_iv.base));
2684 if (vect_print_dump_info (REPORT_DETAILS))
2686 fprintf (vect_dump, "\touter base_address: ");
2687 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2688 fprintf (vect_dump, "\n\touter offset from base address: ");
2689 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2690 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2691 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2692 fprintf (vect_dump, "\n\touter step: ");
2693 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2694 fprintf (vect_dump, "\n\touter aligned to: ");
2695 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2699 if (STMT_VINFO_DATA_REF (stmt_info))
2701 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2703 fprintf (vect_dump,
2704 "not vectorized: more than one data ref in stmt: ");
2705 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2707 return false;
2710 STMT_VINFO_DATA_REF (stmt_info) = dr;
2712 /* Set vectype for STMT. */
2713 scalar_type = TREE_TYPE (DR_REF (dr));
2714 STMT_VINFO_VECTYPE (stmt_info) =
2715 get_vectype_for_scalar_type (scalar_type);
2716 if (!STMT_VINFO_VECTYPE (stmt_info))
2718 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2720 fprintf (vect_dump,
2721 "not vectorized: no vectype for stmt: ");
2722 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2723 fprintf (vect_dump, " scalar_type: ");
2724 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2727 if (bb_vinfo)
2729 /* Mark the statement as not vectorizable. */
2730 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2731 continue;
2733 else
2734 return false;
2737 /* Adjust the minimal vectorization factor according to the
2738 vector type. */
2739 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2740 if (vf > *min_vf)
2741 *min_vf = vf;
2744 return true;
2748 /* Function vect_get_new_vect_var.
2750 Returns a name for a new variable. The current naming scheme appends the
2751 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2752 the name of vectorizer generated variables, and appends that to NAME if
2753 provided. */
2755 tree
2756 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2758 const char *prefix;
2759 tree new_vect_var;
2761 switch (var_kind)
2763 case vect_simple_var:
2764 prefix = "vect_";
2765 break;
2766 case vect_scalar_var:
2767 prefix = "stmp_";
2768 break;
2769 case vect_pointer_var:
2770 prefix = "vect_p";
2771 break;
2772 default:
2773 gcc_unreachable ();
2776 if (name)
2778 char* tmp = concat (prefix, name, NULL);
2779 new_vect_var = create_tmp_var (type, tmp);
2780 free (tmp);
2782 else
2783 new_vect_var = create_tmp_var (type, prefix);
2785 /* Mark vector typed variable as a gimple register variable. */
2786 if (TREE_CODE (type) == VECTOR_TYPE)
2787 DECL_GIMPLE_REG_P (new_vect_var) = true;
2789 return new_vect_var;
2793 /* Function vect_create_addr_base_for_vector_ref.
2795 Create an expression that computes the address of the first memory location
2796 that will be accessed for a data reference.
2798 Input:
2799 STMT: The statement containing the data reference.
2800 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2801 OFFSET: Optional. If supplied, it is be added to the initial address.
2802 LOOP: Specify relative to which loop-nest should the address be computed.
2803 For example, when the dataref is in an inner-loop nested in an
2804 outer-loop that is now being vectorized, LOOP can be either the
2805 outer-loop, or the inner-loop. The first memory location accessed
2806 by the following dataref ('in' points to short):
2808 for (i=0; i<N; i++)
2809 for (j=0; j<M; j++)
2810 s += in[i+j]
2812 is as follows:
2813 if LOOP=i_loop: &in (relative to i_loop)
2814 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2816 Output:
2817 1. Return an SSA_NAME whose value is the address of the memory location of
2818 the first vector of the data reference.
2819 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2820 these statement(s) which define the returned SSA_NAME.
2822 FORNOW: We are only handling array accesses with step 1. */
2824 tree
2825 vect_create_addr_base_for_vector_ref (gimple stmt,
2826 gimple_seq *new_stmt_list,
2827 tree offset,
2828 struct loop *loop)
2830 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2831 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2832 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2833 tree base_name;
2834 tree data_ref_base_var;
2835 tree vec_stmt;
2836 tree addr_base, addr_expr;
2837 tree dest;
2838 gimple_seq seq = NULL;
2839 tree base_offset = unshare_expr (DR_OFFSET (dr));
2840 tree init = unshare_expr (DR_INIT (dr));
2841 tree vect_ptr_type;
2842 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2843 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2844 tree base;
2846 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2848 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2850 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2852 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2853 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2854 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2857 if (loop_vinfo)
2858 base_name = build_fold_indirect_ref (data_ref_base);
2859 else
2861 base_offset = ssize_int (0);
2862 init = ssize_int (0);
2863 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2866 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2867 add_referenced_var (data_ref_base_var);
2868 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2869 data_ref_base_var);
2870 gimple_seq_add_seq (new_stmt_list, seq);
2872 /* Create base_offset */
2873 base_offset = size_binop (PLUS_EXPR,
2874 fold_convert (sizetype, base_offset),
2875 fold_convert (sizetype, init));
2876 dest = create_tmp_var (sizetype, "base_off");
2877 add_referenced_var (dest);
2878 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2879 gimple_seq_add_seq (new_stmt_list, seq);
2881 if (offset)
2883 tree tmp = create_tmp_var (sizetype, "offset");
2885 add_referenced_var (tmp);
2886 offset = fold_build2 (MULT_EXPR, sizetype,
2887 fold_convert (sizetype, offset), step);
2888 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2889 base_offset, offset);
2890 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2891 gimple_seq_add_seq (new_stmt_list, seq);
2894 /* base + base_offset */
2895 if (loop_vinfo)
2896 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2897 data_ref_base, base_offset);
2898 else
2900 addr_base = build1 (ADDR_EXPR,
2901 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2902 unshare_expr (DR_REF (dr)));
2905 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2906 base = get_base_address (DR_REF (dr));
2907 if (base
2908 && TREE_CODE (base) == MEM_REF)
2909 vect_ptr_type
2910 = build_qualified_type (vect_ptr_type,
2911 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2913 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2914 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2915 get_name (base_name));
2916 add_referenced_var (addr_expr);
2917 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2918 gimple_seq_add_seq (new_stmt_list, seq);
2920 if (DR_PTR_INFO (dr)
2921 && TREE_CODE (vec_stmt) == SSA_NAME)
2922 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2924 if (vect_print_dump_info (REPORT_DETAILS))
2926 fprintf (vect_dump, "created ");
2927 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2930 return vec_stmt;
2934 /* Function vect_create_data_ref_ptr.
2936 Create a new pointer to vector type (vp), that points to the first location
2937 accessed in the loop by STMT, along with the def-use update chain to
2938 appropriately advance the pointer through the loop iterations. Also set
2939 aliasing information for the pointer. This vector pointer is used by the
2940 callers to this function to create a memory reference expression for vector
2941 load/store access.
2943 Input:
2944 1. STMT: a stmt that references memory. Expected to be of the form
2945 GIMPLE_ASSIGN <name, data-ref> or
2946 GIMPLE_ASSIGN <data-ref, name>.
2947 2. AT_LOOP: the loop where the vector memref is to be created.
2948 3. OFFSET (optional): an offset to be added to the initial address accessed
2949 by the data-ref in STMT.
2950 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2951 pointing to the initial address.
2952 5. TYPE: if not NULL indicates the required type of the data-ref.
2954 Output:
2955 1. Declare a new ptr to vector_type, and have it point to the base of the
2956 data reference (initial addressed accessed by the data reference).
2957 For example, for vector of type V8HI, the following code is generated:
2959 v8hi *vp;
2960 vp = (v8hi *)initial_address;
2962 if OFFSET is not supplied:
2963 initial_address = &a[init];
2964 if OFFSET is supplied:
2965 initial_address = &a[init + OFFSET];
2967 Return the initial_address in INITIAL_ADDRESS.
2969 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2970 update the pointer in each iteration of the loop.
2972 Return the increment stmt that updates the pointer in PTR_INCR.
2974 3. Set INV_P to true if the access pattern of the data reference in the
2975 vectorized loop is invariant. Set it to false otherwise.
2977 4. Return the pointer. */
2979 tree
2980 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2981 tree offset, tree *initial_address, gimple *ptr_incr,
2982 bool only_init, bool *inv_p)
2984 tree base_name;
2985 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2986 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2987 struct loop *loop = NULL;
2988 bool nested_in_vect_loop = false;
2989 struct loop *containing_loop = NULL;
2990 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2991 tree vect_ptr_type;
2992 tree vect_ptr;
2993 tree new_temp;
2994 gimple vec_stmt;
2995 gimple_seq new_stmt_list = NULL;
2996 edge pe = NULL;
2997 basic_block new_bb;
2998 tree vect_ptr_init;
2999 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3000 tree vptr;
3001 gimple_stmt_iterator incr_gsi;
3002 bool insert_after;
3003 bool negative;
3004 tree indx_before_incr, indx_after_incr;
3005 gimple incr;
3006 tree step;
3007 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3008 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3009 tree base;
3011 if (loop_vinfo)
3013 loop = LOOP_VINFO_LOOP (loop_vinfo);
3014 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3015 containing_loop = (gimple_bb (stmt))->loop_father;
3016 pe = loop_preheader_edge (loop);
3018 else
3020 gcc_assert (bb_vinfo);
3021 only_init = true;
3022 *ptr_incr = NULL;
3025 /* Check the step (evolution) of the load in LOOP, and record
3026 whether it's invariant. */
3027 if (nested_in_vect_loop)
3028 step = STMT_VINFO_DR_STEP (stmt_info);
3029 else
3030 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3032 if (tree_int_cst_compare (step, size_zero_node) == 0)
3033 *inv_p = true;
3034 else
3035 *inv_p = false;
3036 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3038 /* Create an expression for the first address accessed by this load
3039 in LOOP. */
3040 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3042 if (vect_print_dump_info (REPORT_DETAILS))
3044 tree data_ref_base = base_name;
3045 fprintf (vect_dump, "create vector-pointer variable to type: ");
3046 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3047 if (TREE_CODE (data_ref_base) == VAR_DECL
3048 || TREE_CODE (data_ref_base) == ARRAY_REF)
3049 fprintf (vect_dump, " vectorizing an array ref: ");
3050 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3051 fprintf (vect_dump, " vectorizing a record based array ref: ");
3052 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3053 fprintf (vect_dump, " vectorizing a pointer ref: ");
3054 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3057 /* (1) Create the new vector-pointer variable. */
3058 vect_ptr_type = build_pointer_type (vectype);
3059 base = get_base_address (DR_REF (dr));
3060 if (base
3061 && TREE_CODE (base) == MEM_REF)
3062 vect_ptr_type
3063 = build_qualified_type (vect_ptr_type,
3064 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3065 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3066 get_name (base_name));
3068 /* Vector types inherit the alias set of their component type by default so
3069 we need to use a ref-all pointer if the data reference does not conflict
3070 with the created vector data reference because it is not addressable. */
3071 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3072 get_alias_set (DR_REF (dr))))
3074 vect_ptr_type
3075 = build_pointer_type_for_mode (vectype,
3076 TYPE_MODE (vect_ptr_type), true);
3077 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3078 get_name (base_name));
3081 /* Likewise for any of the data references in the stmt group. */
3082 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
3084 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
3087 tree lhs = gimple_assign_lhs (orig_stmt);
3088 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3089 get_alias_set (lhs)))
3091 vect_ptr_type
3092 = build_pointer_type_for_mode (vectype,
3093 TYPE_MODE (vect_ptr_type), true);
3094 vect_ptr
3095 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3096 get_name (base_name));
3097 break;
3100 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
3102 while (orig_stmt);
3105 add_referenced_var (vect_ptr);
3107 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3108 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3109 def-use update cycles for the pointer: one relative to the outer-loop
3110 (LOOP), which is what steps (3) and (4) below do. The other is relative
3111 to the inner-loop (which is the inner-most loop containing the dataref),
3112 and this is done be step (5) below.
3114 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3115 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3116 redundant. Steps (3),(4) create the following:
3118 vp0 = &base_addr;
3119 LOOP: vp1 = phi(vp0,vp2)
3122 vp2 = vp1 + step
3123 goto LOOP
3125 If there is an inner-loop nested in loop, then step (5) will also be
3126 applied, and an additional update in the inner-loop will be created:
3128 vp0 = &base_addr;
3129 LOOP: vp1 = phi(vp0,vp2)
3131 inner: vp3 = phi(vp1,vp4)
3132 vp4 = vp3 + inner_step
3133 if () goto inner
3135 vp2 = vp1 + step
3136 if () goto LOOP */
3138 /* (2) Calculate the initial address the vector-pointer, and set
3139 the vector-pointer to point to it before the loop. */
3141 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3143 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3144 offset, loop);
3145 if (new_stmt_list)
3147 if (pe)
3149 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3150 gcc_assert (!new_bb);
3152 else
3153 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
3156 *initial_address = new_temp;
3158 /* Create: p = (vectype *) initial_base */
3159 if (TREE_CODE (new_temp) != SSA_NAME
3160 || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
3162 vec_stmt = gimple_build_assign (vect_ptr,
3163 fold_convert (vect_ptr_type, new_temp));
3164 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
3165 /* Copy the points-to information if it exists. */
3166 if (DR_PTR_INFO (dr))
3167 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
3168 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
3169 if (pe)
3171 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3172 gcc_assert (!new_bb);
3174 else
3175 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
3177 else
3178 vect_ptr_init = new_temp;
3180 /* (3) Handle the updating of the vector-pointer inside the loop.
3181 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3182 inner-loop nested in LOOP (during outer-loop vectorization). */
3184 /* No update in loop is required. */
3185 if (only_init && (!loop_vinfo || at_loop == loop))
3186 vptr = vect_ptr_init;
3187 else
3189 /* The step of the vector pointer is the Vector Size. */
3190 tree step = TYPE_SIZE_UNIT (vectype);
3191 /* One exception to the above is when the scalar step of the load in
3192 LOOP is zero. In this case the step here is also zero. */
3193 if (*inv_p)
3194 step = size_zero_node;
3195 else if (negative)
3196 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3198 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3200 create_iv (vect_ptr_init,
3201 fold_convert (vect_ptr_type, step),
3202 vect_ptr, loop, &incr_gsi, insert_after,
3203 &indx_before_incr, &indx_after_incr);
3204 incr = gsi_stmt (incr_gsi);
3205 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3207 /* Copy the points-to information if it exists. */
3208 if (DR_PTR_INFO (dr))
3210 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3211 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3213 if (ptr_incr)
3214 *ptr_incr = incr;
3216 vptr = indx_before_incr;
3219 if (!nested_in_vect_loop || only_init)
3220 return vptr;
3223 /* (4) Handle the updating of the vector-pointer inside the inner-loop
3224 nested in LOOP, if exists. */
3226 gcc_assert (nested_in_vect_loop);
3227 if (!only_init)
3229 standard_iv_increment_position (containing_loop, &incr_gsi,
3230 &insert_after);
3231 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
3232 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3233 &indx_after_incr);
3234 incr = gsi_stmt (incr_gsi);
3235 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3237 /* Copy the points-to information if it exists. */
3238 if (DR_PTR_INFO (dr))
3240 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3241 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3243 if (ptr_incr)
3244 *ptr_incr = incr;
3246 return indx_before_incr;
3248 else
3249 gcc_unreachable ();
3253 /* Function bump_vector_ptr
3255 Increment a pointer (to a vector type) by vector-size. If requested,
3256 i.e. if PTR-INCR is given, then also connect the new increment stmt
3257 to the existing def-use update-chain of the pointer, by modifying
3258 the PTR_INCR as illustrated below:
3260 The pointer def-use update-chain before this function:
3261 DATAREF_PTR = phi (p_0, p_2)
3262 ....
3263 PTR_INCR: p_2 = DATAREF_PTR + step
3265 The pointer def-use update-chain after this function:
3266 DATAREF_PTR = phi (p_0, p_2)
3267 ....
3268 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3269 ....
3270 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3272 Input:
3273 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3274 in the loop.
3275 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3276 the loop. The increment amount across iterations is expected
3277 to be vector_size.
3278 BSI - location where the new update stmt is to be placed.
3279 STMT - the original scalar memory-access stmt that is being vectorized.
3280 BUMP - optional. The offset by which to bump the pointer. If not given,
3281 the offset is assumed to be vector_size.
3283 Output: Return NEW_DATAREF_PTR as illustrated above.
3287 tree
3288 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3289 gimple stmt, tree bump)
3291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3292 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3293 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3294 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3295 tree update = TYPE_SIZE_UNIT (vectype);
3296 gimple incr_stmt;
3297 ssa_op_iter iter;
3298 use_operand_p use_p;
3299 tree new_dataref_ptr;
3301 if (bump)
3302 update = bump;
3304 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3305 dataref_ptr, update);
3306 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3307 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3308 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3310 /* Copy the points-to information if it exists. */
3311 if (DR_PTR_INFO (dr))
3312 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3314 if (!ptr_incr)
3315 return new_dataref_ptr;
3317 /* Update the vector-pointer's cross-iteration increment. */
3318 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3320 tree use = USE_FROM_PTR (use_p);
3322 if (use == dataref_ptr)
3323 SET_USE (use_p, new_dataref_ptr);
3324 else
3325 gcc_assert (tree_int_cst_compare (use, update) == 0);
3328 return new_dataref_ptr;
3332 /* Function vect_create_destination_var.
3334 Create a new temporary of type VECTYPE. */
3336 tree
3337 vect_create_destination_var (tree scalar_dest, tree vectype)
3339 tree vec_dest;
3340 const char *new_name;
3341 tree type;
3342 enum vect_var_kind kind;
3344 kind = vectype ? vect_simple_var : vect_scalar_var;
3345 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3347 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3349 new_name = get_name (scalar_dest);
3350 if (!new_name)
3351 new_name = "var_";
3352 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3353 add_referenced_var (vec_dest);
3355 return vec_dest;
3358 /* Function vect_strided_store_supported.
3360 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3361 and FALSE otherwise. */
3363 bool
3364 vect_strided_store_supported (tree vectype)
3366 optab interleave_high_optab, interleave_low_optab;
3367 enum machine_mode mode;
3369 mode = TYPE_MODE (vectype);
3371 /* Check that the operation is supported. */
3372 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3373 vectype, optab_default);
3374 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3375 vectype, optab_default);
3376 if (!interleave_high_optab || !interleave_low_optab)
3378 if (vect_print_dump_info (REPORT_DETAILS))
3379 fprintf (vect_dump, "no optab for interleave.");
3380 return false;
3383 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3384 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3386 if (vect_print_dump_info (REPORT_DETAILS))
3387 fprintf (vect_dump, "interleave op not supported by target.");
3388 return false;
3391 return true;
3395 /* Function vect_permute_store_chain.
3397 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3398 a power of 2, generate interleave_high/low stmts to reorder the data
3399 correctly for the stores. Return the final references for stores in
3400 RESULT_CHAIN.
3402 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3403 The input is 4 vectors each containing 8 elements. We assign a number to
3404 each element, the input sequence is:
3406 1st vec: 0 1 2 3 4 5 6 7
3407 2nd vec: 8 9 10 11 12 13 14 15
3408 3rd vec: 16 17 18 19 20 21 22 23
3409 4th vec: 24 25 26 27 28 29 30 31
3411 The output sequence should be:
3413 1st vec: 0 8 16 24 1 9 17 25
3414 2nd vec: 2 10 18 26 3 11 19 27
3415 3rd vec: 4 12 20 28 5 13 21 30
3416 4th vec: 6 14 22 30 7 15 23 31
3418 i.e., we interleave the contents of the four vectors in their order.
3420 We use interleave_high/low instructions to create such output. The input of
3421 each interleave_high/low operation is two vectors:
3422 1st vec 2nd vec
3423 0 1 2 3 4 5 6 7
3424 the even elements of the result vector are obtained left-to-right from the
3425 high/low elements of the first vector. The odd elements of the result are
3426 obtained left-to-right from the high/low elements of the second vector.
3427 The output of interleave_high will be: 0 4 1 5
3428 and of interleave_low: 2 6 3 7
3431 The permutation is done in log LENGTH stages. In each stage interleave_high
3432 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3433 where the first argument is taken from the first half of DR_CHAIN and the
3434 second argument from it's second half.
3435 In our example,
3437 I1: interleave_high (1st vec, 3rd vec)
3438 I2: interleave_low (1st vec, 3rd vec)
3439 I3: interleave_high (2nd vec, 4th vec)
3440 I4: interleave_low (2nd vec, 4th vec)
3442 The output for the first stage is:
3444 I1: 0 16 1 17 2 18 3 19
3445 I2: 4 20 5 21 6 22 7 23
3446 I3: 8 24 9 25 10 26 11 27
3447 I4: 12 28 13 29 14 30 15 31
3449 The output of the second stage, i.e. the final result is:
3451 I1: 0 8 16 24 1 9 17 25
3452 I2: 2 10 18 26 3 11 19 27
3453 I3: 4 12 20 28 5 13 21 30
3454 I4: 6 14 22 30 7 15 23 31. */
3456 bool
3457 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3458 unsigned int length,
3459 gimple stmt,
3460 gimple_stmt_iterator *gsi,
3461 VEC(tree,heap) **result_chain)
3463 tree perm_dest, vect1, vect2, high, low;
3464 gimple perm_stmt;
3465 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3466 int i;
3467 unsigned int j;
3468 enum tree_code high_code, low_code;
3470 /* Check that the operation is supported. */
3471 if (!vect_strided_store_supported (vectype))
3472 return false;
3474 *result_chain = VEC_copy (tree, heap, dr_chain);
3476 for (i = 0; i < exact_log2 (length); i++)
3478 for (j = 0; j < length/2; j++)
3480 vect1 = VEC_index (tree, dr_chain, j);
3481 vect2 = VEC_index (tree, dr_chain, j+length/2);
3483 /* Create interleaving stmt:
3484 in the case of big endian:
3485 high = interleave_high (vect1, vect2)
3486 and in the case of little endian:
3487 high = interleave_low (vect1, vect2). */
3488 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3489 DECL_GIMPLE_REG_P (perm_dest) = 1;
3490 add_referenced_var (perm_dest);
3491 if (BYTES_BIG_ENDIAN)
3493 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3494 low_code = VEC_INTERLEAVE_LOW_EXPR;
3496 else
3498 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3499 high_code = VEC_INTERLEAVE_LOW_EXPR;
3501 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3502 vect1, vect2);
3503 high = make_ssa_name (perm_dest, perm_stmt);
3504 gimple_assign_set_lhs (perm_stmt, high);
3505 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3506 VEC_replace (tree, *result_chain, 2*j, high);
3508 /* Create interleaving stmt:
3509 in the case of big endian:
3510 low = interleave_low (vect1, vect2)
3511 and in the case of little endian:
3512 low = interleave_high (vect1, vect2). */
3513 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3514 DECL_GIMPLE_REG_P (perm_dest) = 1;
3515 add_referenced_var (perm_dest);
3516 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3517 vect1, vect2);
3518 low = make_ssa_name (perm_dest, perm_stmt);
3519 gimple_assign_set_lhs (perm_stmt, low);
3520 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3521 VEC_replace (tree, *result_chain, 2*j+1, low);
3523 dr_chain = VEC_copy (tree, heap, *result_chain);
3525 return true;
3528 /* Function vect_setup_realignment
3530 This function is called when vectorizing an unaligned load using
3531 the dr_explicit_realign[_optimized] scheme.
3532 This function generates the following code at the loop prolog:
3534 p = initial_addr;
3535 x msq_init = *(floor(p)); # prolog load
3536 realignment_token = call target_builtin;
3537 loop:
3538 x msq = phi (msq_init, ---)
3540 The stmts marked with x are generated only for the case of
3541 dr_explicit_realign_optimized.
3543 The code above sets up a new (vector) pointer, pointing to the first
3544 location accessed by STMT, and a "floor-aligned" load using that pointer.
3545 It also generates code to compute the "realignment-token" (if the relevant
3546 target hook was defined), and creates a phi-node at the loop-header bb
3547 whose arguments are the result of the prolog-load (created by this
3548 function) and the result of a load that takes place in the loop (to be
3549 created by the caller to this function).
3551 For the case of dr_explicit_realign_optimized:
3552 The caller to this function uses the phi-result (msq) to create the
3553 realignment code inside the loop, and sets up the missing phi argument,
3554 as follows:
3555 loop:
3556 msq = phi (msq_init, lsq)
3557 lsq = *(floor(p')); # load in loop
3558 result = realign_load (msq, lsq, realignment_token);
3560 For the case of dr_explicit_realign:
3561 loop:
3562 msq = *(floor(p)); # load in loop
3563 p' = p + (VS-1);
3564 lsq = *(floor(p')); # load in loop
3565 result = realign_load (msq, lsq, realignment_token);
3567 Input:
3568 STMT - (scalar) load stmt to be vectorized. This load accesses
3569 a memory location that may be unaligned.
3570 BSI - place where new code is to be inserted.
3571 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3572 is used.
3574 Output:
3575 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3576 target hook, if defined.
3577 Return value - the result of the loop-header phi node. */
3579 tree
3580 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3581 tree *realignment_token,
3582 enum dr_alignment_support alignment_support_scheme,
3583 tree init_addr,
3584 struct loop **at_loop)
3586 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3587 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3588 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3589 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3590 struct loop *loop = NULL;
3591 edge pe = NULL;
3592 tree scalar_dest = gimple_assign_lhs (stmt);
3593 tree vec_dest;
3594 gimple inc;
3595 tree ptr;
3596 tree data_ref;
3597 gimple new_stmt;
3598 basic_block new_bb;
3599 tree msq_init = NULL_TREE;
3600 tree new_temp;
3601 gimple phi_stmt;
3602 tree msq = NULL_TREE;
3603 gimple_seq stmts = NULL;
3604 bool inv_p;
3605 bool compute_in_loop = false;
3606 bool nested_in_vect_loop = false;
3607 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3608 struct loop *loop_for_initial_load = NULL;
3610 if (loop_vinfo)
3612 loop = LOOP_VINFO_LOOP (loop_vinfo);
3613 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3616 gcc_assert (alignment_support_scheme == dr_explicit_realign
3617 || alignment_support_scheme == dr_explicit_realign_optimized);
3619 /* We need to generate three things:
3620 1. the misalignment computation
3621 2. the extra vector load (for the optimized realignment scheme).
3622 3. the phi node for the two vectors from which the realignment is
3623 done (for the optimized realignment scheme). */
3625 /* 1. Determine where to generate the misalignment computation.
3627 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3628 calculation will be generated by this function, outside the loop (in the
3629 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3630 caller, inside the loop.
3632 Background: If the misalignment remains fixed throughout the iterations of
3633 the loop, then both realignment schemes are applicable, and also the
3634 misalignment computation can be done outside LOOP. This is because we are
3635 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3636 are a multiple of VS (the Vector Size), and therefore the misalignment in
3637 different vectorized LOOP iterations is always the same.
3638 The problem arises only if the memory access is in an inner-loop nested
3639 inside LOOP, which is now being vectorized using outer-loop vectorization.
3640 This is the only case when the misalignment of the memory access may not
3641 remain fixed throughout the iterations of the inner-loop (as explained in
3642 detail in vect_supportable_dr_alignment). In this case, not only is the
3643 optimized realignment scheme not applicable, but also the misalignment
3644 computation (and generation of the realignment token that is passed to
3645 REALIGN_LOAD) have to be done inside the loop.
3647 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3648 or not, which in turn determines if the misalignment is computed inside
3649 the inner-loop, or outside LOOP. */
3651 if (init_addr != NULL_TREE || !loop_vinfo)
3653 compute_in_loop = true;
3654 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3658 /* 2. Determine where to generate the extra vector load.
3660 For the optimized realignment scheme, instead of generating two vector
3661 loads in each iteration, we generate a single extra vector load in the
3662 preheader of the loop, and in each iteration reuse the result of the
3663 vector load from the previous iteration. In case the memory access is in
3664 an inner-loop nested inside LOOP, which is now being vectorized using
3665 outer-loop vectorization, we need to determine whether this initial vector
3666 load should be generated at the preheader of the inner-loop, or can be
3667 generated at the preheader of LOOP. If the memory access has no evolution
3668 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3669 to be generated inside LOOP (in the preheader of the inner-loop). */
3671 if (nested_in_vect_loop)
3673 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3674 bool invariant_in_outerloop =
3675 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3676 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3678 else
3679 loop_for_initial_load = loop;
3680 if (at_loop)
3681 *at_loop = loop_for_initial_load;
3683 if (loop_for_initial_load)
3684 pe = loop_preheader_edge (loop_for_initial_load);
3686 /* 3. For the case of the optimized realignment, create the first vector
3687 load at the loop preheader. */
3689 if (alignment_support_scheme == dr_explicit_realign_optimized)
3691 /* Create msq_init = *(floor(p1)) in the loop preheader */
3693 gcc_assert (!compute_in_loop);
3694 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3695 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3696 &init_addr, &inc, true, &inv_p);
3697 new_stmt = gimple_build_assign_with_ops
3698 (BIT_AND_EXPR, NULL_TREE, ptr,
3699 build_int_cst (TREE_TYPE (ptr),
3700 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3701 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3702 gimple_assign_set_lhs (new_stmt, new_temp);
3703 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3704 gcc_assert (!new_bb);
3705 data_ref
3706 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3707 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3708 new_stmt = gimple_build_assign (vec_dest, data_ref);
3709 new_temp = make_ssa_name (vec_dest, new_stmt);
3710 gimple_assign_set_lhs (new_stmt, new_temp);
3711 mark_symbols_for_renaming (new_stmt);
3712 if (pe)
3714 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3715 gcc_assert (!new_bb);
3717 else
3718 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3720 msq_init = gimple_assign_lhs (new_stmt);
3723 /* 4. Create realignment token using a target builtin, if available.
3724 It is done either inside the containing loop, or before LOOP (as
3725 determined above). */
3727 if (targetm.vectorize.builtin_mask_for_load)
3729 tree builtin_decl;
3731 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3732 if (!init_addr)
3734 /* Generate the INIT_ADDR computation outside LOOP. */
3735 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3736 NULL_TREE, loop);
3737 if (loop)
3739 pe = loop_preheader_edge (loop);
3740 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3741 gcc_assert (!new_bb);
3743 else
3744 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3747 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3748 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3749 vec_dest =
3750 vect_create_destination_var (scalar_dest,
3751 gimple_call_return_type (new_stmt));
3752 new_temp = make_ssa_name (vec_dest, new_stmt);
3753 gimple_call_set_lhs (new_stmt, new_temp);
3755 if (compute_in_loop)
3756 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3757 else
3759 /* Generate the misalignment computation outside LOOP. */
3760 pe = loop_preheader_edge (loop);
3761 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3762 gcc_assert (!new_bb);
3765 *realignment_token = gimple_call_lhs (new_stmt);
3767 /* The result of the CALL_EXPR to this builtin is determined from
3768 the value of the parameter and no global variables are touched
3769 which makes the builtin a "const" function. Requiring the
3770 builtin to have the "const" attribute makes it unnecessary
3771 to call mark_call_clobbered. */
3772 gcc_assert (TREE_READONLY (builtin_decl));
3775 if (alignment_support_scheme == dr_explicit_realign)
3776 return msq;
3778 gcc_assert (!compute_in_loop);
3779 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3782 /* 5. Create msq = phi <msq_init, lsq> in loop */
3784 pe = loop_preheader_edge (containing_loop);
3785 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3786 msq = make_ssa_name (vec_dest, NULL);
3787 phi_stmt = create_phi_node (msq, containing_loop->header);
3788 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3789 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3791 return msq;
3795 /* Function vect_strided_load_supported.
3797 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3798 and FALSE otherwise. */
3800 bool
3801 vect_strided_load_supported (tree vectype)
3803 optab perm_even_optab, perm_odd_optab;
3804 enum machine_mode mode;
3806 mode = TYPE_MODE (vectype);
3808 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3809 optab_default);
3810 if (!perm_even_optab)
3812 if (vect_print_dump_info (REPORT_DETAILS))
3813 fprintf (vect_dump, "no optab for perm_even.");
3814 return false;
3817 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3819 if (vect_print_dump_info (REPORT_DETAILS))
3820 fprintf (vect_dump, "perm_even op not supported by target.");
3821 return false;
3824 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3825 optab_default);
3826 if (!perm_odd_optab)
3828 if (vect_print_dump_info (REPORT_DETAILS))
3829 fprintf (vect_dump, "no optab for perm_odd.");
3830 return false;
3833 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3835 if (vect_print_dump_info (REPORT_DETAILS))
3836 fprintf (vect_dump, "perm_odd op not supported by target.");
3837 return false;
3839 return true;
3843 /* Function vect_permute_load_chain.
3845 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3846 a power of 2, generate extract_even/odd stmts to reorder the input data
3847 correctly. Return the final references for loads in RESULT_CHAIN.
3849 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3850 The input is 4 vectors each containing 8 elements. We assign a number to each
3851 element, the input sequence is:
3853 1st vec: 0 1 2 3 4 5 6 7
3854 2nd vec: 8 9 10 11 12 13 14 15
3855 3rd vec: 16 17 18 19 20 21 22 23
3856 4th vec: 24 25 26 27 28 29 30 31
3858 The output sequence should be:
3860 1st vec: 0 4 8 12 16 20 24 28
3861 2nd vec: 1 5 9 13 17 21 25 29
3862 3rd vec: 2 6 10 14 18 22 26 30
3863 4th vec: 3 7 11 15 19 23 27 31
3865 i.e., the first output vector should contain the first elements of each
3866 interleaving group, etc.
3868 We use extract_even/odd instructions to create such output. The input of
3869 each extract_even/odd operation is two vectors
3870 1st vec 2nd vec
3871 0 1 2 3 4 5 6 7
3873 and the output is the vector of extracted even/odd elements. The output of
3874 extract_even will be: 0 2 4 6
3875 and of extract_odd: 1 3 5 7
3878 The permutation is done in log LENGTH stages. In each stage extract_even
3879 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3880 their order. In our example,
3882 E1: extract_even (1st vec, 2nd vec)
3883 E2: extract_odd (1st vec, 2nd vec)
3884 E3: extract_even (3rd vec, 4th vec)
3885 E4: extract_odd (3rd vec, 4th vec)
3887 The output for the first stage will be:
3889 E1: 0 2 4 6 8 10 12 14
3890 E2: 1 3 5 7 9 11 13 15
3891 E3: 16 18 20 22 24 26 28 30
3892 E4: 17 19 21 23 25 27 29 31
3894 In order to proceed and create the correct sequence for the next stage (or
3895 for the correct output, if the second stage is the last one, as in our
3896 example), we first put the output of extract_even operation and then the
3897 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3898 The input for the second stage is:
3900 1st vec (E1): 0 2 4 6 8 10 12 14
3901 2nd vec (E3): 16 18 20 22 24 26 28 30
3902 3rd vec (E2): 1 3 5 7 9 11 13 15
3903 4th vec (E4): 17 19 21 23 25 27 29 31
3905 The output of the second stage:
3907 E1: 0 4 8 12 16 20 24 28
3908 E2: 2 6 10 14 18 22 26 30
3909 E3: 1 5 9 13 17 21 25 29
3910 E4: 3 7 11 15 19 23 27 31
3912 And RESULT_CHAIN after reordering:
3914 1st vec (E1): 0 4 8 12 16 20 24 28
3915 2nd vec (E3): 1 5 9 13 17 21 25 29
3916 3rd vec (E2): 2 6 10 14 18 22 26 30
3917 4th vec (E4): 3 7 11 15 19 23 27 31. */
3919 bool
3920 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3921 unsigned int length,
3922 gimple stmt,
3923 gimple_stmt_iterator *gsi,
3924 VEC(tree,heap) **result_chain)
3926 tree perm_dest, data_ref, first_vect, second_vect;
3927 gimple perm_stmt;
3928 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3929 int i;
3930 unsigned int j;
3932 /* Check that the operation is supported. */
3933 if (!vect_strided_load_supported (vectype))
3934 return false;
3936 *result_chain = VEC_copy (tree, heap, dr_chain);
3937 for (i = 0; i < exact_log2 (length); i++)
3939 for (j = 0; j < length; j +=2)
3941 first_vect = VEC_index (tree, dr_chain, j);
3942 second_vect = VEC_index (tree, dr_chain, j+1);
3944 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3945 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3946 DECL_GIMPLE_REG_P (perm_dest) = 1;
3947 add_referenced_var (perm_dest);
3949 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3950 perm_dest, first_vect,
3951 second_vect);
3953 data_ref = make_ssa_name (perm_dest, perm_stmt);
3954 gimple_assign_set_lhs (perm_stmt, data_ref);
3955 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3956 mark_symbols_for_renaming (perm_stmt);
3958 VEC_replace (tree, *result_chain, j/2, data_ref);
3960 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3961 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3962 DECL_GIMPLE_REG_P (perm_dest) = 1;
3963 add_referenced_var (perm_dest);
3965 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3966 perm_dest, first_vect,
3967 second_vect);
3968 data_ref = make_ssa_name (perm_dest, perm_stmt);
3969 gimple_assign_set_lhs (perm_stmt, data_ref);
3970 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3971 mark_symbols_for_renaming (perm_stmt);
3973 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3975 dr_chain = VEC_copy (tree, heap, *result_chain);
3977 return true;
3981 /* Function vect_transform_strided_load.
3983 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3984 to perform their permutation and ascribe the result vectorized statements to
3985 the scalar statements.
3988 bool
3989 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3990 gimple_stmt_iterator *gsi)
3992 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3993 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3994 gimple next_stmt, new_stmt;
3995 VEC(tree,heap) *result_chain = NULL;
3996 unsigned int i, gap_count;
3997 tree tmp_data_ref;
3999 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4000 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4001 vectors, that are ready for vector computation. */
4002 result_chain = VEC_alloc (tree, heap, size);
4003 /* Permute. */
4004 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
4005 return false;
4007 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4008 Since we scan the chain starting from it's first node, their order
4009 corresponds the order of data-refs in RESULT_CHAIN. */
4010 next_stmt = first_stmt;
4011 gap_count = 1;
4012 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4014 if (!next_stmt)
4015 break;
4017 /* Skip the gaps. Loads created for the gaps will be removed by dead
4018 code elimination pass later. No need to check for the first stmt in
4019 the group, since it always exists.
4020 DR_GROUP_GAP is the number of steps in elements from the previous
4021 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4022 correspond to the gaps. */
4023 if (next_stmt != first_stmt
4024 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4026 gap_count++;
4027 continue;
4030 while (next_stmt)
4032 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4033 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4034 copies, and we put the new vector statement in the first available
4035 RELATED_STMT. */
4036 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4037 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4038 else
4040 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4042 gimple prev_stmt =
4043 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4044 gimple rel_stmt =
4045 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4046 while (rel_stmt)
4048 prev_stmt = rel_stmt;
4049 rel_stmt =
4050 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4053 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4054 new_stmt;
4058 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4059 gap_count = 1;
4060 /* If NEXT_STMT accesses the same DR as the previous statement,
4061 put the same TMP_DATA_REF as its vectorized statement; otherwise
4062 get the next data-ref from RESULT_CHAIN. */
4063 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4064 break;
4068 VEC_free (tree, heap, result_chain);
4069 return true;
4072 /* Function vect_force_dr_alignment_p.
4074 Returns whether the alignment of a DECL can be forced to be aligned
4075 on ALIGNMENT bit boundary. */
4077 bool
4078 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4080 if (TREE_CODE (decl) != VAR_DECL)
4081 return false;
4083 if (DECL_EXTERNAL (decl))
4084 return false;
4086 if (TREE_ASM_WRITTEN (decl))
4087 return false;
4089 if (TREE_STATIC (decl))
4090 return (alignment <= MAX_OFILE_ALIGNMENT);
4091 else
4092 return (alignment <= MAX_STACK_ALIGNMENT);
4096 /* Return whether the data reference DR is supported with respect to its
4097 alignment.
4098 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4099 it is aligned, i.e., check if it is possible to vectorize it with different
4100 alignment. */
4102 enum dr_alignment_support
4103 vect_supportable_dr_alignment (struct data_reference *dr,
4104 bool check_aligned_accesses)
4106 gimple stmt = DR_STMT (dr);
4107 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4108 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4109 enum machine_mode mode = TYPE_MODE (vectype);
4110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4111 struct loop *vect_loop = NULL;
4112 bool nested_in_vect_loop = false;
4114 if (aligned_access_p (dr) && !check_aligned_accesses)
4115 return dr_aligned;
4117 if (loop_vinfo)
4119 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4120 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4123 /* Possibly unaligned access. */
4125 /* We can choose between using the implicit realignment scheme (generating
4126 a misaligned_move stmt) and the explicit realignment scheme (generating
4127 aligned loads with a REALIGN_LOAD). There are two variants to the
4128 explicit realignment scheme: optimized, and unoptimized.
4129 We can optimize the realignment only if the step between consecutive
4130 vector loads is equal to the vector size. Since the vector memory
4131 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4132 is guaranteed that the misalignment amount remains the same throughout the
4133 execution of the vectorized loop. Therefore, we can create the
4134 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4135 at the loop preheader.
4137 However, in the case of outer-loop vectorization, when vectorizing a
4138 memory access in the inner-loop nested within the LOOP that is now being
4139 vectorized, while it is guaranteed that the misalignment of the
4140 vectorized memory access will remain the same in different outer-loop
4141 iterations, it is *not* guaranteed that is will remain the same throughout
4142 the execution of the inner-loop. This is because the inner-loop advances
4143 with the original scalar step (and not in steps of VS). If the inner-loop
4144 step happens to be a multiple of VS, then the misalignment remains fixed
4145 and we can use the optimized realignment scheme. For example:
4147 for (i=0; i<N; i++)
4148 for (j=0; j<M; j++)
4149 s += a[i+j];
4151 When vectorizing the i-loop in the above example, the step between
4152 consecutive vector loads is 1, and so the misalignment does not remain
4153 fixed across the execution of the inner-loop, and the realignment cannot
4154 be optimized (as illustrated in the following pseudo vectorized loop):
4156 for (i=0; i<N; i+=4)
4157 for (j=0; j<M; j++){
4158 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4159 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4160 // (assuming that we start from an aligned address).
4163 We therefore have to use the unoptimized realignment scheme:
4165 for (i=0; i<N; i+=4)
4166 for (j=k; j<M; j+=4)
4167 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4168 // that the misalignment of the initial address is
4169 // 0).
4171 The loop can then be vectorized as follows:
4173 for (k=0; k<4; k++){
4174 rt = get_realignment_token (&vp[k]);
4175 for (i=0; i<N; i+=4){
4176 v1 = vp[i+k];
4177 for (j=k; j<M; j+=4){
4178 v2 = vp[i+j+VS-1];
4179 va = REALIGN_LOAD <v1,v2,rt>;
4180 vs += va;
4181 v1 = v2;
4184 } */
4186 if (DR_IS_READ (dr))
4188 bool is_packed = false;
4189 tree type = (TREE_TYPE (DR_REF (dr)));
4191 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4192 && (!targetm.vectorize.builtin_mask_for_load
4193 || targetm.vectorize.builtin_mask_for_load ()))
4195 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4196 if ((nested_in_vect_loop
4197 && (TREE_INT_CST_LOW (DR_STEP (dr))
4198 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4199 || !loop_vinfo)
4200 return dr_explicit_realign;
4201 else
4202 return dr_explicit_realign_optimized;
4204 if (!known_alignment_for_access_p (dr))
4206 tree ba = DR_BASE_OBJECT (dr);
4208 if (ba)
4209 is_packed = contains_packed_reference (ba);
4212 if (targetm.vectorize.
4213 support_vector_misalignment (mode, type,
4214 DR_MISALIGNMENT (dr), is_packed))
4215 /* Can't software pipeline the loads, but can at least do them. */
4216 return dr_unaligned_supported;
4218 else
4220 bool is_packed = false;
4221 tree type = (TREE_TYPE (DR_REF (dr)));
4223 if (!known_alignment_for_access_p (dr))
4225 tree ba = DR_BASE_OBJECT (dr);
4227 if (ba)
4228 is_packed = contains_packed_reference (ba);
4231 if (targetm.vectorize.
4232 support_vector_misalignment (mode, type,
4233 DR_MISALIGNMENT (dr), is_packed))
4234 return dr_unaligned_supported;
4237 /* Unsupported. */
4238 return dr_unaligned_unsupported;