recog.c (split_all_insns): Remove dead code.
[official-gcc.git] / gcc / tree-vect-data-refs.c
blobb4da5178be930e6550965c78491ca9b620871d72
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
41 #include "toplev.h"
43 /* Need to include rtl.h, expr.h, etc. for optabs. */
44 #include "expr.h"
45 #include "optabs.h"
47 /* Return the smallest scalar part of STMT.
48 This is used to determine the vectype of the stmt. We generally set the
49 vectype according to the type of the result (lhs). For stmts whose
50 result-type is different than the type of the arguments (e.g., demotion,
51 promotion), vectype will be reset appropriately (later). Note that we have
52 to visit the smallest datatype in this function, because that determines the
53 VF. If the smallest datatype in the loop is present only as the rhs of a
54 promotion operation - we'd miss it.
55 Such a case, where a variable of this datatype does not appear in the lhs
56 anywhere in the loop, can only occur if it's an invariant: e.g.:
57 'int_x = (int) short_inv', which we'd expect to have been optimized away by
58 invariant motion. However, we cannot rely on invariant motion to always
59 take invariants out of the loop, and so in the case of promotion we also
60 have to check the rhs.
61 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
62 types. */
64 tree
65 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
66 HOST_WIDE_INT *rhs_size_unit)
68 tree scalar_type = gimple_expr_type (stmt);
69 HOST_WIDE_INT lhs, rhs;
71 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
73 if (is_gimple_assign (stmt)
74 && (gimple_assign_cast_p (stmt)
75 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
76 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
78 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
80 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
81 if (rhs < lhs)
82 scalar_type = rhs_type;
85 *lhs_size_unit = lhs;
86 *rhs_size_unit = rhs;
87 return scalar_type;
91 /* Find the place of the data-ref in STMT in the interleaving chain that starts
92 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
94 int
95 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
97 gimple next_stmt = first_stmt;
98 int result = 0;
100 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
101 return -1;
103 while (next_stmt && next_stmt != stmt)
105 result++;
106 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
109 if (next_stmt)
110 return result;
111 else
112 return -1;
116 /* Function vect_insert_into_interleaving_chain.
118 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
120 static void
121 vect_insert_into_interleaving_chain (struct data_reference *dra,
122 struct data_reference *drb)
124 gimple prev, next;
125 tree next_init;
126 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
127 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
129 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
130 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
131 while (next)
133 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
134 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
136 /* Insert here. */
137 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
138 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
139 return;
141 prev = next;
142 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
145 /* We got to the end of the list. Insert here. */
146 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
147 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
151 /* Function vect_update_interleaving_chain.
153 For two data-refs DRA and DRB that are a part of a chain interleaved data
154 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
156 There are four possible cases:
157 1. New stmts - both DRA and DRB are not a part of any chain:
158 FIRST_DR = DRB
159 NEXT_DR (DRB) = DRA
160 2. DRB is a part of a chain and DRA is not:
161 no need to update FIRST_DR
162 no need to insert DRB
163 insert DRA according to init
164 3. DRA is a part of a chain and DRB is not:
165 if (init of FIRST_DR > init of DRB)
166 FIRST_DR = DRB
167 NEXT(FIRST_DR) = previous FIRST_DR
168 else
169 insert DRB according to its init
170 4. both DRA and DRB are in some interleaving chains:
171 choose the chain with the smallest init of FIRST_DR
172 insert the nodes of the second chain into the first one. */
174 static void
175 vect_update_interleaving_chain (struct data_reference *drb,
176 struct data_reference *dra)
178 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
179 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
180 tree next_init, init_dra_chain, init_drb_chain;
181 gimple first_a, first_b;
182 tree node_init;
183 gimple node, prev, next, first_stmt;
185 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
186 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
188 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
189 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
190 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
191 return;
194 /* 2. DRB is a part of a chain and DRA is not. */
195 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
197 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
198 /* Insert DRA into the chain of DRB. */
199 vect_insert_into_interleaving_chain (dra, drb);
200 return;
203 /* 3. DRA is a part of a chain and DRB is not. */
204 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
206 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
207 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
208 old_first_stmt)));
209 gimple tmp;
211 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
213 /* DRB's init is smaller than the init of the stmt previously marked
214 as the first stmt of the interleaving chain of DRA. Therefore, we
215 update FIRST_STMT and put DRB in the head of the list. */
216 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
217 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
219 /* Update all the stmts in the list to point to the new FIRST_STMT. */
220 tmp = old_first_stmt;
221 while (tmp)
223 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
224 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
227 else
229 /* Insert DRB in the list of DRA. */
230 vect_insert_into_interleaving_chain (drb, dra);
231 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
233 return;
236 /* 4. both DRA and DRB are in some interleaving chains. */
237 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
238 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
239 if (first_a == first_b)
240 return;
241 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
242 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
244 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
246 /* Insert the nodes of DRA chain into the DRB chain.
247 After inserting a node, continue from this node of the DRB chain (don't
248 start from the beginning. */
249 node = DR_GROUP_FIRST_DR (stmtinfo_a);
250 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
251 first_stmt = first_b;
253 else
255 /* Insert the nodes of DRB chain into the DRA chain.
256 After inserting a node, continue from this node of the DRA chain (don't
257 start from the beginning. */
258 node = DR_GROUP_FIRST_DR (stmtinfo_b);
259 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
260 first_stmt = first_a;
263 while (node)
265 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
266 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
267 while (next)
269 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
270 if (tree_int_cst_compare (next_init, node_init) > 0)
272 /* Insert here. */
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
274 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
275 prev = node;
276 break;
278 prev = next;
279 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
281 if (!next)
283 /* We got to the end of the list. Insert here. */
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
285 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
286 prev = node;
288 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
289 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
294 /* Function vect_equal_offsets.
296 Check if OFFSET1 and OFFSET2 are identical expressions. */
298 static bool
299 vect_equal_offsets (tree offset1, tree offset2)
301 bool res;
303 STRIP_NOPS (offset1);
304 STRIP_NOPS (offset2);
306 if (offset1 == offset2)
307 return true;
309 if (TREE_CODE (offset1) != TREE_CODE (offset2)
310 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
311 return false;
313 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
314 TREE_OPERAND (offset2, 0));
316 if (!res || !BINARY_CLASS_P (offset1))
317 return res;
319 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
320 TREE_OPERAND (offset2, 1));
322 return res;
326 /* Check dependence between DRA and DRB for basic block vectorization.
327 If the accesses share same bases and offsets, we can compare their initial
328 constant offsets to decide whether they differ or not. In case of a read-
329 write dependence we check that the load is before the store to ensure that
330 vectorization will not change the order of the accesses. */
332 static bool
333 vect_drs_dependent_in_basic_block (struct data_reference *dra,
334 struct data_reference *drb)
336 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
337 gimple earlier_stmt;
339 /* We only call this function for pairs of loads and stores, but we verify
340 it here. */
341 if (DR_IS_READ (dra) == DR_IS_READ (drb))
343 if (DR_IS_READ (dra))
344 return false;
345 else
346 return true;
349 /* Check that the data-refs have same bases and offsets. If not, we can't
350 determine if they are dependent. */
351 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
352 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
353 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
354 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
355 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
356 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
357 return true;
359 /* Check the types. */
360 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
361 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
363 if (type_size_a != type_size_b
364 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
365 TREE_TYPE (DR_REF (drb))))
366 return true;
368 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
369 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
371 /* Two different locations - no dependence. */
372 if (init_a != init_b)
373 return false;
375 /* We have a read-write dependence. Check that the load is before the store.
376 When we vectorize basic blocks, vector load can be only before
377 corresponding scalar load, and vector store can be only after its
378 corresponding scalar store. So the order of the acceses is preserved in
379 case the load is before the store. */
380 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
381 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
382 return false;
384 return true;
388 /* Function vect_check_interleaving.
390 Check if DRA and DRB are a part of interleaving. In case they are, insert
391 DRA and DRB in an interleaving chain. */
393 static bool
394 vect_check_interleaving (struct data_reference *dra,
395 struct data_reference *drb)
397 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
399 /* Check that the data-refs have same first location (except init) and they
400 are both either store or load (not load and store). */
401 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
402 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
403 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
404 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
405 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
406 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
407 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
408 || DR_IS_READ (dra) != DR_IS_READ (drb))
409 return false;
411 /* Check:
412 1. data-refs are of the same type
413 2. their steps are equal
414 3. the step (if greater than zero) is greater than the difference between
415 data-refs' inits. */
416 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
417 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
419 if (type_size_a != type_size_b
420 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
421 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
422 TREE_TYPE (DR_REF (drb))))
423 return false;
425 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
426 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
427 step = TREE_INT_CST_LOW (DR_STEP (dra));
429 if (init_a > init_b)
431 /* If init_a == init_b + the size of the type * k, we have an interleaving,
432 and DRB is accessed before DRA. */
433 diff_mod_size = (init_a - init_b) % type_size_a;
435 if (step && (init_a - init_b) > step)
436 return false;
438 if (diff_mod_size == 0)
440 vect_update_interleaving_chain (drb, dra);
441 if (vect_print_dump_info (REPORT_DR_DETAILS))
443 fprintf (vect_dump, "Detected interleaving ");
444 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
445 fprintf (vect_dump, " and ");
446 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
448 return true;
451 else
453 /* If init_b == init_a + the size of the type * k, we have an
454 interleaving, and DRA is accessed before DRB. */
455 diff_mod_size = (init_b - init_a) % type_size_a;
457 if (step && (init_b - init_a) > step)
458 return false;
460 if (diff_mod_size == 0)
462 vect_update_interleaving_chain (dra, drb);
463 if (vect_print_dump_info (REPORT_DR_DETAILS))
465 fprintf (vect_dump, "Detected interleaving ");
466 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
467 fprintf (vect_dump, " and ");
468 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
470 return true;
474 return false;
477 /* Check if data references pointed by DR_I and DR_J are same or
478 belong to same interleaving group. Return FALSE if drs are
479 different, otherwise return TRUE. */
481 static bool
482 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
484 gimple stmt_i = DR_STMT (dr_i);
485 gimple stmt_j = DR_STMT (dr_j);
487 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
488 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
489 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
490 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
491 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
492 return true;
493 else
494 return false;
497 /* If address ranges represented by DDR_I and DDR_J are equal,
498 return TRUE, otherwise return FALSE. */
500 static bool
501 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
503 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
504 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
505 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
506 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
507 return true;
508 else
509 return false;
512 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
513 tested at run-time. Return TRUE if DDR was successfully inserted.
514 Return false if versioning is not supported. */
516 static bool
517 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
519 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
521 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
522 return false;
524 if (vect_print_dump_info (REPORT_DR_DETAILS))
526 fprintf (vect_dump, "mark for run-time aliasing test between ");
527 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
528 fprintf (vect_dump, " and ");
529 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
532 if (optimize_loop_nest_for_size_p (loop))
534 if (vect_print_dump_info (REPORT_DR_DETAILS))
535 fprintf (vect_dump, "versioning not supported when optimizing for size.");
536 return false;
539 /* FORNOW: We don't support versioning with outer-loop vectorization. */
540 if (loop->inner)
542 if (vect_print_dump_info (REPORT_DR_DETAILS))
543 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
544 return false;
547 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
548 return true;
552 /* Function vect_analyze_data_ref_dependence.
554 Return TRUE if there (might) exist a dependence between a memory-reference
555 DRA and a memory-reference DRB. When versioning for alias may check a
556 dependence at run-time, return FALSE. Adjust *MAX_VF according to
557 the data dependence. */
559 static bool
560 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
561 loop_vec_info loop_vinfo, int *max_vf,
562 bool *data_dependence_in_bb)
564 unsigned int i;
565 struct loop *loop = NULL;
566 struct data_reference *dra = DDR_A (ddr);
567 struct data_reference *drb = DDR_B (ddr);
568 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
569 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
570 lambda_vector dist_v;
571 unsigned int loop_depth;
573 /* Don't bother to analyze statements marked as unvectorizable. */
574 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
575 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
576 return false;
578 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
580 /* Independent data accesses. */
581 vect_check_interleaving (dra, drb);
582 return false;
585 if (loop_vinfo)
586 loop = LOOP_VINFO_LOOP (loop_vinfo);
588 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
589 return false;
591 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
593 if (loop_vinfo)
595 if (vect_print_dump_info (REPORT_DR_DETAILS))
597 fprintf (vect_dump, "versioning for alias required: "
598 "can't determine dependence between ");
599 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
600 fprintf (vect_dump, " and ");
601 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
604 /* Add to list of ddrs that need to be tested at run-time. */
605 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
608 /* When vectorizing a basic block unknown depnedence can still mean
609 strided access. */
610 if (vect_check_interleaving (dra, drb))
611 return false;
613 if (vect_print_dump_info (REPORT_DR_DETAILS))
615 fprintf (vect_dump, "can't determine dependence between ");
616 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
617 fprintf (vect_dump, " and ");
618 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
621 /* We do not vectorize basic blocks with write-write dependencies. */
622 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
623 return true;
625 /* We deal with read-write dependencies in basic blocks later (by
626 verifying that all the loads in the basic block are before all the
627 stores). */
628 *data_dependence_in_bb = true;
629 return false;
632 /* Versioning for alias is not yet supported for basic block SLP, and
633 dependence distance is unapplicable, hence, in case of known data
634 dependence, basic block vectorization is impossible for now. */
635 if (!loop_vinfo)
637 if (dra != drb && vect_check_interleaving (dra, drb))
638 return false;
640 if (vect_print_dump_info (REPORT_DR_DETAILS))
642 fprintf (vect_dump, "determined dependence between ");
643 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
644 fprintf (vect_dump, " and ");
645 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
648 /* Do not vectorize basic blcoks with write-write dependences. */
649 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
650 return true;
652 /* Check if this dependence is allowed in basic block vectorization. */
653 return vect_drs_dependent_in_basic_block (dra, drb);
656 /* Loop-based vectorization and known data dependence. */
657 if (DDR_NUM_DIST_VECTS (ddr) == 0)
659 if (vect_print_dump_info (REPORT_DR_DETAILS))
661 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
662 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
663 fprintf (vect_dump, " and ");
664 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
666 /* Add to list of ddrs that need to be tested at run-time. */
667 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
670 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
671 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
673 int dist = dist_v[loop_depth];
675 if (vect_print_dump_info (REPORT_DR_DETAILS))
676 fprintf (vect_dump, "dependence distance = %d.", dist);
678 if (dist == 0)
680 if (vect_print_dump_info (REPORT_DR_DETAILS))
682 fprintf (vect_dump, "dependence distance == 0 between ");
683 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
684 fprintf (vect_dump, " and ");
685 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
688 /* For interleaving, mark that there is a read-write dependency if
689 necessary. We check before that one of the data-refs is store. */
690 if (DR_IS_READ (dra))
691 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
692 else
694 if (DR_IS_READ (drb))
695 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
698 continue;
701 if (dist > 0 && DDR_REVERSED_P (ddr))
703 /* If DDR_REVERSED_P the order of the data-refs in DDR was
704 reversed (to make distance vector positive), and the actual
705 distance is negative. */
706 if (vect_print_dump_info (REPORT_DR_DETAILS))
707 fprintf (vect_dump, "dependence distance negative.");
708 continue;
711 if (abs (dist) >= 2
712 && abs (dist) < *max_vf)
714 /* The dependence distance requires reduction of the maximal
715 vectorization factor. */
716 *max_vf = abs (dist);
717 if (vect_print_dump_info (REPORT_DR_DETAILS))
718 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
719 *max_vf);
722 if (abs (dist) >= *max_vf)
724 /* Dependence distance does not create dependence, as far as
725 vectorization is concerned, in this case. */
726 if (vect_print_dump_info (REPORT_DR_DETAILS))
727 fprintf (vect_dump, "dependence distance >= VF.");
728 continue;
731 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
733 fprintf (vect_dump, "not vectorized, possible dependence "
734 "between data-refs ");
735 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
736 fprintf (vect_dump, " and ");
737 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
740 return true;
743 return false;
746 /* Function vect_analyze_data_ref_dependences.
748 Examine all the data references in the loop, and make sure there do not
749 exist any data dependences between them. Set *MAX_VF according to
750 the maximum vectorization factor the data dependences allow. */
752 bool
753 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
754 bb_vec_info bb_vinfo, int *max_vf,
755 bool *data_dependence_in_bb)
757 unsigned int i;
758 VEC (ddr_p, heap) *ddrs = NULL;
759 struct data_dependence_relation *ddr;
761 if (vect_print_dump_info (REPORT_DETAILS))
762 fprintf (vect_dump, "=== vect_analyze_dependences ===");
764 if (loop_vinfo)
765 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
766 else
767 ddrs = BB_VINFO_DDRS (bb_vinfo);
769 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
770 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
771 data_dependence_in_bb))
772 return false;
774 return true;
778 /* Function vect_compute_data_ref_alignment
780 Compute the misalignment of the data reference DR.
782 Output:
783 1. If during the misalignment computation it is found that the data reference
784 cannot be vectorized then false is returned.
785 2. DR_MISALIGNMENT (DR) is defined.
787 FOR NOW: No analysis is actually performed. Misalignment is calculated
788 only for trivial cases. TODO. */
790 static bool
791 vect_compute_data_ref_alignment (struct data_reference *dr)
793 gimple stmt = DR_STMT (dr);
794 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
795 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
796 struct loop *loop = NULL;
797 tree ref = DR_REF (dr);
798 tree vectype;
799 tree base, base_addr;
800 bool base_aligned;
801 tree misalign;
802 tree aligned_to, alignment;
804 if (vect_print_dump_info (REPORT_DETAILS))
805 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
807 if (loop_vinfo)
808 loop = LOOP_VINFO_LOOP (loop_vinfo);
810 /* Initialize misalignment to unknown. */
811 SET_DR_MISALIGNMENT (dr, -1);
813 misalign = DR_INIT (dr);
814 aligned_to = DR_ALIGNED_TO (dr);
815 base_addr = DR_BASE_ADDRESS (dr);
816 vectype = STMT_VINFO_VECTYPE (stmt_info);
818 /* In case the dataref is in an inner-loop of the loop that is being
819 vectorized (LOOP), we use the base and misalignment information
820 relative to the outer-loop (LOOP). This is ok only if the misalignment
821 stays the same throughout the execution of the inner-loop, which is why
822 we have to check that the stride of the dataref in the inner-loop evenly
823 divides by the vector size. */
824 if (loop && nested_in_vect_loop_p (loop, stmt))
826 tree step = DR_STEP (dr);
827 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
829 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
831 if (vect_print_dump_info (REPORT_ALIGNMENT))
832 fprintf (vect_dump, "inner step divides the vector-size.");
833 misalign = STMT_VINFO_DR_INIT (stmt_info);
834 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
835 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
837 else
839 if (vect_print_dump_info (REPORT_ALIGNMENT))
840 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
841 misalign = NULL_TREE;
845 base = build_fold_indirect_ref (base_addr);
846 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
848 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
849 || !misalign)
851 if (vect_print_dump_info (REPORT_ALIGNMENT))
853 fprintf (vect_dump, "Unknown alignment for access: ");
854 print_generic_expr (vect_dump, base, TDF_SLIM);
856 return true;
859 if ((DECL_P (base)
860 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
861 alignment) >= 0)
862 || (TREE_CODE (base_addr) == SSA_NAME
863 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
864 TREE_TYPE (base_addr)))),
865 alignment) >= 0))
866 base_aligned = true;
867 else
868 base_aligned = false;
870 if (!base_aligned)
872 /* Do not change the alignment of global variables if
873 flag_section_anchors is enabled. */
874 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
875 || (TREE_STATIC (base) && flag_section_anchors))
877 if (vect_print_dump_info (REPORT_DETAILS))
879 fprintf (vect_dump, "can't force alignment of ref: ");
880 print_generic_expr (vect_dump, ref, TDF_SLIM);
882 return true;
885 /* Force the alignment of the decl.
886 NOTE: This is the only change to the code we make during
887 the analysis phase, before deciding to vectorize the loop. */
888 if (vect_print_dump_info (REPORT_DETAILS))
890 fprintf (vect_dump, "force alignment of ");
891 print_generic_expr (vect_dump, ref, TDF_SLIM);
894 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
895 DECL_USER_ALIGN (base) = 1;
898 /* At this point we assume that the base is aligned. */
899 gcc_assert (base_aligned
900 || (TREE_CODE (base) == VAR_DECL
901 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
903 /* If this is a backward running DR then first access in the larger
904 vectype actually is N-1 elements before the address in the DR.
905 Adjust misalign accordingly. */
906 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
908 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
909 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
910 otherwise we wouldn't be here. */
911 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
912 /* PLUS because DR_STEP was negative. */
913 misalign = size_binop (PLUS_EXPR, misalign, offset);
916 /* Modulo alignment. */
917 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
919 if (!host_integerp (misalign, 1))
921 /* Negative or overflowed misalignment value. */
922 if (vect_print_dump_info (REPORT_DETAILS))
923 fprintf (vect_dump, "unexpected misalign value");
924 return false;
927 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
929 if (vect_print_dump_info (REPORT_DETAILS))
931 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
932 print_generic_expr (vect_dump, ref, TDF_SLIM);
935 return true;
939 /* Function vect_compute_data_refs_alignment
941 Compute the misalignment of data references in the loop.
942 Return FALSE if a data reference is found that cannot be vectorized. */
944 static bool
945 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
946 bb_vec_info bb_vinfo)
948 VEC (data_reference_p, heap) *datarefs;
949 struct data_reference *dr;
950 unsigned int i;
952 if (loop_vinfo)
953 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
954 else
955 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
957 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
958 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
959 && !vect_compute_data_ref_alignment (dr))
961 if (bb_vinfo)
963 /* Mark unsupported statement as unvectorizable. */
964 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
965 continue;
967 else
968 return false;
971 return true;
975 /* Function vect_update_misalignment_for_peel
977 DR - the data reference whose misalignment is to be adjusted.
978 DR_PEEL - the data reference whose misalignment is being made
979 zero in the vector loop by the peel.
980 NPEEL - the number of iterations in the peel loop if the misalignment
981 of DR_PEEL is known at compile time. */
983 static void
984 vect_update_misalignment_for_peel (struct data_reference *dr,
985 struct data_reference *dr_peel, int npeel)
987 unsigned int i;
988 VEC(dr_p,heap) *same_align_drs;
989 struct data_reference *current_dr;
990 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
991 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
992 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
993 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
995 /* For interleaved data accesses the step in the loop must be multiplied by
996 the size of the interleaving group. */
997 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
998 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
999 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1000 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1002 /* It can be assumed that the data refs with the same alignment as dr_peel
1003 are aligned in the vector loop. */
1004 same_align_drs
1005 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1006 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1008 if (current_dr != dr)
1009 continue;
1010 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1011 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1012 SET_DR_MISALIGNMENT (dr, 0);
1013 return;
1016 if (known_alignment_for_access_p (dr)
1017 && known_alignment_for_access_p (dr_peel))
1019 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1020 int misal = DR_MISALIGNMENT (dr);
1021 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1022 misal += negative ? -npeel * dr_size : npeel * dr_size;
1023 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1024 SET_DR_MISALIGNMENT (dr, misal);
1025 return;
1028 if (vect_print_dump_info (REPORT_DETAILS))
1029 fprintf (vect_dump, "Setting misalignment to -1.");
1030 SET_DR_MISALIGNMENT (dr, -1);
1034 /* Function vect_verify_datarefs_alignment
1036 Return TRUE if all data references in the loop can be
1037 handled with respect to alignment. */
1039 bool
1040 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1042 VEC (data_reference_p, heap) *datarefs;
1043 struct data_reference *dr;
1044 enum dr_alignment_support supportable_dr_alignment;
1045 unsigned int i;
1047 if (loop_vinfo)
1048 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1049 else
1050 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1052 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1054 gimple stmt = DR_STMT (dr);
1055 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1057 /* For interleaving, only the alignment of the first access matters.
1058 Skip statements marked as not vectorizable. */
1059 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1060 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1061 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1062 continue;
1064 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1065 if (!supportable_dr_alignment)
1067 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1069 if (DR_IS_READ (dr))
1070 fprintf (vect_dump,
1071 "not vectorized: unsupported unaligned load.");
1072 else
1073 fprintf (vect_dump,
1074 "not vectorized: unsupported unaligned store.");
1076 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1078 return false;
1080 if (supportable_dr_alignment != dr_aligned
1081 && vect_print_dump_info (REPORT_ALIGNMENT))
1082 fprintf (vect_dump, "Vectorizing an unaligned access.");
1084 return true;
1088 /* Function vector_alignment_reachable_p
1090 Return true if vector alignment for DR is reachable by peeling
1091 a few loop iterations. Return false otherwise. */
1093 static bool
1094 vector_alignment_reachable_p (struct data_reference *dr)
1096 gimple stmt = DR_STMT (dr);
1097 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1098 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1100 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1102 /* For interleaved access we peel only if number of iterations in
1103 the prolog loop ({VF - misalignment}), is a multiple of the
1104 number of the interleaved accesses. */
1105 int elem_size, mis_in_elements;
1106 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1108 /* FORNOW: handle only known alignment. */
1109 if (!known_alignment_for_access_p (dr))
1110 return false;
1112 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1113 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1115 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1116 return false;
1119 /* If misalignment is known at the compile time then allow peeling
1120 only if natural alignment is reachable through peeling. */
1121 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1123 HOST_WIDE_INT elmsize =
1124 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1125 if (vect_print_dump_info (REPORT_DETAILS))
1127 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1128 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1130 if (DR_MISALIGNMENT (dr) % elmsize)
1132 if (vect_print_dump_info (REPORT_DETAILS))
1133 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1134 return false;
1138 if (!known_alignment_for_access_p (dr))
1140 tree type = (TREE_TYPE (DR_REF (dr)));
1141 tree ba = DR_BASE_OBJECT (dr);
1142 bool is_packed = false;
1144 if (ba)
1145 is_packed = contains_packed_reference (ba);
1147 if (vect_print_dump_info (REPORT_DETAILS))
1148 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1149 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1150 return true;
1151 else
1152 return false;
1155 return true;
1159 /* Calculate the cost of the memory access represented by DR. */
1161 static void
1162 vect_get_data_access_cost (struct data_reference *dr,
1163 unsigned int *inside_cost,
1164 unsigned int *outside_cost)
1166 gimple stmt = DR_STMT (dr);
1167 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1168 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1169 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1170 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1171 int ncopies = vf / nunits;
1172 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1174 if (!supportable_dr_alignment)
1175 *inside_cost = VECT_MAX_COST;
1176 else
1178 if (DR_IS_READ (dr))
1179 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1180 else
1181 vect_get_store_cost (dr, ncopies, inside_cost);
1184 if (vect_print_dump_info (REPORT_COST))
1185 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1186 "outside_cost = %d.", *inside_cost, *outside_cost);
1190 static hashval_t
1191 vect_peeling_hash (const void *elem)
1193 const struct _vect_peel_info *peel_info;
1195 peel_info = (const struct _vect_peel_info *) elem;
1196 return (hashval_t) peel_info->npeel;
1200 static int
1201 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1203 const struct _vect_peel_info *a, *b;
1205 a = (const struct _vect_peel_info *) elem1;
1206 b = (const struct _vect_peel_info *) elem2;
1207 return (a->npeel == b->npeel);
1211 /* Insert DR into peeling hash table with NPEEL as key. */
1213 static void
1214 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1215 int npeel)
1217 struct _vect_peel_info elem, *slot;
1218 void **new_slot;
1219 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1221 elem.npeel = npeel;
1222 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1223 &elem);
1224 if (slot)
1225 slot->count++;
1226 else
1228 slot = XNEW (struct _vect_peel_info);
1229 slot->npeel = npeel;
1230 slot->dr = dr;
1231 slot->count = 1;
1232 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1233 INSERT);
1234 *new_slot = slot;
1237 if (!supportable_dr_alignment && !flag_vect_cost_model)
1238 slot->count += VECT_MAX_COST;
1242 /* Traverse peeling hash table to find peeling option that aligns maximum
1243 number of data accesses. */
1245 static int
1246 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1248 vect_peel_info elem = (vect_peel_info) *slot;
1249 vect_peel_extended_info max = (vect_peel_extended_info) data;
1251 if (elem->count > max->peel_info.count)
1253 max->peel_info.npeel = elem->npeel;
1254 max->peel_info.count = elem->count;
1255 max->peel_info.dr = elem->dr;
1258 return 1;
1262 /* Traverse peeling hash table and calculate cost for each peeling option.
1263 Find the one with the lowest cost. */
1265 static int
1266 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1268 vect_peel_info elem = (vect_peel_info) *slot;
1269 vect_peel_extended_info min = (vect_peel_extended_info) data;
1270 int save_misalignment, dummy;
1271 unsigned int inside_cost = 0, outside_cost = 0, i;
1272 gimple stmt = DR_STMT (elem->dr);
1273 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1274 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1275 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1276 struct data_reference *dr;
1278 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1280 stmt = DR_STMT (dr);
1281 stmt_info = vinfo_for_stmt (stmt);
1282 /* For interleaving, only the alignment of the first access
1283 matters. */
1284 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1285 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1286 continue;
1288 save_misalignment = DR_MISALIGNMENT (dr);
1289 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1290 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1291 SET_DR_MISALIGNMENT (dr, save_misalignment);
1294 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1295 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1297 if (inside_cost < min->inside_cost
1298 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1300 min->inside_cost = inside_cost;
1301 min->outside_cost = outside_cost;
1302 min->peel_info.dr = elem->dr;
1303 min->peel_info.npeel = elem->npeel;
1306 return 1;
1310 /* Choose best peeling option by traversing peeling hash table and either
1311 choosing an option with the lowest cost (if cost model is enabled) or the
1312 option that aligns as many accesses as possible. */
1314 static struct data_reference *
1315 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1316 unsigned int *npeel)
1318 struct _vect_peel_extended_info res;
1320 res.peel_info.dr = NULL;
1322 if (flag_vect_cost_model)
1324 res.inside_cost = INT_MAX;
1325 res.outside_cost = INT_MAX;
1326 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1327 vect_peeling_hash_get_lowest_cost, &res);
1329 else
1331 res.peel_info.count = 0;
1332 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1333 vect_peeling_hash_get_most_frequent, &res);
1336 *npeel = res.peel_info.npeel;
1337 return res.peel_info.dr;
1341 /* Function vect_enhance_data_refs_alignment
1343 This pass will use loop versioning and loop peeling in order to enhance
1344 the alignment of data references in the loop.
1346 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1347 original loop is to be vectorized. Any other loops that are created by
1348 the transformations performed in this pass - are not supposed to be
1349 vectorized. This restriction will be relaxed.
1351 This pass will require a cost model to guide it whether to apply peeling
1352 or versioning or a combination of the two. For example, the scheme that
1353 intel uses when given a loop with several memory accesses, is as follows:
1354 choose one memory access ('p') which alignment you want to force by doing
1355 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1356 other accesses are not necessarily aligned, or (2) use loop versioning to
1357 generate one loop in which all accesses are aligned, and another loop in
1358 which only 'p' is necessarily aligned.
1360 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1361 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1362 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1364 Devising a cost model is the most critical aspect of this work. It will
1365 guide us on which access to peel for, whether to use loop versioning, how
1366 many versions to create, etc. The cost model will probably consist of
1367 generic considerations as well as target specific considerations (on
1368 powerpc for example, misaligned stores are more painful than misaligned
1369 loads).
1371 Here are the general steps involved in alignment enhancements:
1373 -- original loop, before alignment analysis:
1374 for (i=0; i<N; i++){
1375 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1376 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1379 -- After vect_compute_data_refs_alignment:
1380 for (i=0; i<N; i++){
1381 x = q[i]; # DR_MISALIGNMENT(q) = 3
1382 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1385 -- Possibility 1: we do loop versioning:
1386 if (p is aligned) {
1387 for (i=0; i<N; i++){ # loop 1A
1388 x = q[i]; # DR_MISALIGNMENT(q) = 3
1389 p[i] = y; # DR_MISALIGNMENT(p) = 0
1392 else {
1393 for (i=0; i<N; i++){ # loop 1B
1394 x = q[i]; # DR_MISALIGNMENT(q) = 3
1395 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1399 -- Possibility 2: we do loop peeling:
1400 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1401 x = q[i];
1402 p[i] = y;
1404 for (i = 3; i < N; i++){ # loop 2A
1405 x = q[i]; # DR_MISALIGNMENT(q) = 0
1406 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1409 -- Possibility 3: combination of loop peeling and versioning:
1410 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1411 x = q[i];
1412 p[i] = y;
1414 if (p is aligned) {
1415 for (i = 3; i<N; i++){ # loop 3A
1416 x = q[i]; # DR_MISALIGNMENT(q) = 0
1417 p[i] = y; # DR_MISALIGNMENT(p) = 0
1420 else {
1421 for (i = 3; i<N; i++){ # loop 3B
1422 x = q[i]; # DR_MISALIGNMENT(q) = 0
1423 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1427 These loops are later passed to loop_transform to be vectorized. The
1428 vectorizer will use the alignment information to guide the transformation
1429 (whether to generate regular loads/stores, or with special handling for
1430 misalignment). */
1432 bool
1433 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1435 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1436 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1437 enum dr_alignment_support supportable_dr_alignment;
1438 struct data_reference *dr0 = NULL, *first_store = NULL;
1439 struct data_reference *dr;
1440 unsigned int i, j;
1441 bool do_peeling = false;
1442 bool do_versioning = false;
1443 bool stat;
1444 gimple stmt;
1445 stmt_vec_info stmt_info;
1446 int vect_versioning_for_alias_required;
1447 unsigned int npeel = 0;
1448 bool all_misalignments_unknown = true;
1449 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1450 unsigned possible_npeel_number = 1;
1451 tree vectype;
1452 unsigned int nelements, mis, same_align_drs_max = 0;
1454 if (vect_print_dump_info (REPORT_DETAILS))
1455 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1457 /* While cost model enhancements are expected in the future, the high level
1458 view of the code at this time is as follows:
1460 A) If there is a misaligned access then see if peeling to align
1461 this access can make all data references satisfy
1462 vect_supportable_dr_alignment. If so, update data structures
1463 as needed and return true.
1465 B) If peeling wasn't possible and there is a data reference with an
1466 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1467 then see if loop versioning checks can be used to make all data
1468 references satisfy vect_supportable_dr_alignment. If so, update
1469 data structures as needed and return true.
1471 C) If neither peeling nor versioning were successful then return false if
1472 any data reference does not satisfy vect_supportable_dr_alignment.
1474 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1476 Note, Possibility 3 above (which is peeling and versioning together) is not
1477 being done at this time. */
1479 /* (1) Peeling to force alignment. */
1481 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1482 Considerations:
1483 + How many accesses will become aligned due to the peeling
1484 - How many accesses will become unaligned due to the peeling,
1485 and the cost of misaligned accesses.
1486 - The cost of peeling (the extra runtime checks, the increase
1487 in code size). */
1489 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1491 stmt = DR_STMT (dr);
1492 stmt_info = vinfo_for_stmt (stmt);
1494 /* For interleaving, only the alignment of the first access
1495 matters. */
1496 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1497 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1498 continue;
1500 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1501 do_peeling = vector_alignment_reachable_p (dr);
1502 if (do_peeling)
1504 if (known_alignment_for_access_p (dr))
1506 unsigned int npeel_tmp;
1507 bool negative = tree_int_cst_compare (DR_STEP (dr),
1508 size_zero_node) < 0;
1510 /* Save info about DR in the hash table. */
1511 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1512 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1513 htab_create (1, vect_peeling_hash,
1514 vect_peeling_hash_eq, free);
1516 vectype = STMT_VINFO_VECTYPE (stmt_info);
1517 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1518 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1519 TREE_TYPE (DR_REF (dr))));
1520 npeel_tmp = (negative
1521 ? (mis - nelements) : (nelements - mis)) & (vf - 1);
1523 /* For multiple types, it is possible that the bigger type access
1524 will have more than one peeling option. E.g., a loop with two
1525 types: one of size (vector size / 4), and the other one of
1526 size (vector size / 8). Vectorization factor will 8. If both
1527 access are misaligned by 3, the first one needs one scalar
1528 iteration to be aligned, and the second one needs 5. But the
1529 the first one will be aligned also by peeling 5 scalar
1530 iterations, and in that case both accesses will be aligned.
1531 Hence, except for the immediate peeling amount, we also want
1532 to try to add full vector size, while we don't exceed
1533 vectorization factor.
1534 We do this automtically for cost model, since we calculate cost
1535 for every peeling option. */
1536 if (!flag_vect_cost_model)
1537 possible_npeel_number = vf /nelements;
1539 /* Handle the aligned case. We may decide to align some other
1540 access, making DR unaligned. */
1541 if (DR_MISALIGNMENT (dr) == 0)
1543 npeel_tmp = 0;
1544 if (!flag_vect_cost_model)
1545 possible_npeel_number++;
1548 for (j = 0; j < possible_npeel_number; j++)
1550 gcc_assert (npeel_tmp <= vf);
1551 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1552 npeel_tmp += nelements;
1555 all_misalignments_unknown = false;
1556 /* Data-ref that was chosen for the case that all the
1557 misalignments are unknown is not relevant anymore, since we
1558 have a data-ref with known alignment. */
1559 dr0 = NULL;
1561 else
1563 /* If we don't know all the misalignment values, we prefer
1564 peeling for data-ref that has maximum number of data-refs
1565 with the same alignment, unless the target prefers to align
1566 stores over load. */
1567 if (all_misalignments_unknown)
1569 if (same_align_drs_max < VEC_length (dr_p,
1570 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1571 || !dr0)
1573 same_align_drs_max = VEC_length (dr_p,
1574 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1575 dr0 = dr;
1578 if (!first_store && DR_IS_WRITE (dr))
1579 first_store = dr;
1582 /* If there are both known and unknown misaligned accesses in the
1583 loop, we choose peeling amount according to the known
1584 accesses. */
1587 if (!supportable_dr_alignment)
1589 dr0 = dr;
1590 if (!first_store && DR_IS_WRITE (dr))
1591 first_store = dr;
1595 else
1597 if (!aligned_access_p (dr))
1599 if (vect_print_dump_info (REPORT_DETAILS))
1600 fprintf (vect_dump, "vector alignment may not be reachable");
1602 break;
1607 vect_versioning_for_alias_required
1608 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1610 /* Temporarily, if versioning for alias is required, we disable peeling
1611 until we support peeling and versioning. Often peeling for alignment
1612 will require peeling for loop-bound, which in turn requires that we
1613 know how to adjust the loop ivs after the loop. */
1614 if (vect_versioning_for_alias_required
1615 || !vect_can_advance_ivs_p (loop_vinfo)
1616 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1617 do_peeling = false;
1619 if (do_peeling && all_misalignments_unknown
1620 && vect_supportable_dr_alignment (dr0, false))
1623 /* Check if the target requires to prefer stores over loads, i.e., if
1624 misaligned stores are more expensive than misaligned loads (taking
1625 drs with same alignment into account). */
1626 if (first_store && DR_IS_READ (dr0))
1628 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1629 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1630 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1631 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1633 vect_get_data_access_cost (dr0, &load_inside_cost,
1634 &load_outside_cost);
1635 vect_get_data_access_cost (first_store, &store_inside_cost,
1636 &store_outside_cost);
1638 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1639 aligning the load DR0). */
1640 load_inside_penalty = store_inside_cost;
1641 load_outside_penalty = store_outside_cost;
1642 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1643 (vinfo_for_stmt (DR_STMT (first_store))),
1644 i, dr);
1645 i++)
1646 if (DR_IS_READ (dr))
1648 load_inside_penalty += load_inside_cost;
1649 load_outside_penalty += load_outside_cost;
1651 else
1653 load_inside_penalty += store_inside_cost;
1654 load_outside_penalty += store_outside_cost;
1657 /* Calculate the penalty for leaving DR0 unaligned (by
1658 aligning the FIRST_STORE). */
1659 store_inside_penalty = load_inside_cost;
1660 store_outside_penalty = load_outside_cost;
1661 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1662 (vinfo_for_stmt (DR_STMT (dr0))),
1663 i, dr);
1664 i++)
1665 if (DR_IS_READ (dr))
1667 store_inside_penalty += load_inside_cost;
1668 store_outside_penalty += load_outside_cost;
1670 else
1672 store_inside_penalty += store_inside_cost;
1673 store_outside_penalty += store_outside_cost;
1676 if (load_inside_penalty > store_inside_penalty
1677 || (load_inside_penalty == store_inside_penalty
1678 && load_outside_penalty > store_outside_penalty))
1679 dr0 = first_store;
1682 /* In case there are only loads with different unknown misalignments, use
1683 peeling only if it may help to align other accesses in the loop. */
1684 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1685 (vinfo_for_stmt (DR_STMT (dr0))))
1686 && vect_supportable_dr_alignment (dr0, false)
1687 != dr_unaligned_supported)
1688 do_peeling = false;
1691 if (do_peeling && !dr0)
1693 /* Peeling is possible, but there is no data access that is not supported
1694 unless aligned. So we try to choose the best possible peeling. */
1696 /* We should get here only if there are drs with known misalignment. */
1697 gcc_assert (!all_misalignments_unknown);
1699 /* Choose the best peeling from the hash table. */
1700 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1701 if (!dr0 || !npeel)
1702 do_peeling = false;
1705 if (do_peeling)
1707 stmt = DR_STMT (dr0);
1708 stmt_info = vinfo_for_stmt (stmt);
1709 vectype = STMT_VINFO_VECTYPE (stmt_info);
1710 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1712 if (known_alignment_for_access_p (dr0))
1714 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1715 size_zero_node) < 0;
1716 if (!npeel)
1718 /* Since it's known at compile time, compute the number of
1719 iterations in the peeled loop (the peeling factor) for use in
1720 updating DR_MISALIGNMENT values. The peeling factor is the
1721 vectorization factor minus the misalignment as an element
1722 count. */
1723 mis = DR_MISALIGNMENT (dr0);
1724 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1725 npeel = (negative ? mis - nelements : nelements - mis) & (vf - 1);
1728 /* For interleaved data access every iteration accesses all the
1729 members of the group, therefore we divide the number of iterations
1730 by the group size. */
1731 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1732 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1733 npeel /= DR_GROUP_SIZE (stmt_info);
1735 if (vect_print_dump_info (REPORT_DETAILS))
1736 fprintf (vect_dump, "Try peeling by %d", npeel);
1739 /* Ensure that all data refs can be vectorized after the peel. */
1740 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1742 int save_misalignment;
1744 if (dr == dr0)
1745 continue;
1747 stmt = DR_STMT (dr);
1748 stmt_info = vinfo_for_stmt (stmt);
1749 /* For interleaving, only the alignment of the first access
1750 matters. */
1751 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1752 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1753 continue;
1755 save_misalignment = DR_MISALIGNMENT (dr);
1756 vect_update_misalignment_for_peel (dr, dr0, npeel);
1757 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1758 SET_DR_MISALIGNMENT (dr, save_misalignment);
1760 if (!supportable_dr_alignment)
1762 do_peeling = false;
1763 break;
1767 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1769 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1770 if (!stat)
1771 do_peeling = false;
1772 else
1773 return stat;
1776 if (do_peeling)
1778 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1779 If the misalignment of DR_i is identical to that of dr0 then set
1780 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1781 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1782 by the peeling factor times the element size of DR_i (MOD the
1783 vectorization factor times the size). Otherwise, the
1784 misalignment of DR_i must be set to unknown. */
1785 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1786 if (dr != dr0)
1787 vect_update_misalignment_for_peel (dr, dr0, npeel);
1789 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1790 if (npeel)
1791 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1792 else
1793 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1794 SET_DR_MISALIGNMENT (dr0, 0);
1795 if (vect_print_dump_info (REPORT_ALIGNMENT))
1796 fprintf (vect_dump, "Alignment of access forced using peeling.");
1798 if (vect_print_dump_info (REPORT_DETAILS))
1799 fprintf (vect_dump, "Peeling for alignment will be applied.");
1801 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1802 gcc_assert (stat);
1803 return stat;
1808 /* (2) Versioning to force alignment. */
1810 /* Try versioning if:
1811 1) flag_tree_vect_loop_version is TRUE
1812 2) optimize loop for speed
1813 3) there is at least one unsupported misaligned data ref with an unknown
1814 misalignment, and
1815 4) all misaligned data refs with a known misalignment are supported, and
1816 5) the number of runtime alignment checks is within reason. */
1818 do_versioning =
1819 flag_tree_vect_loop_version
1820 && optimize_loop_nest_for_speed_p (loop)
1821 && (!loop->inner); /* FORNOW */
1823 if (do_versioning)
1825 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1827 stmt = DR_STMT (dr);
1828 stmt_info = vinfo_for_stmt (stmt);
1830 /* For interleaving, only the alignment of the first access
1831 matters. */
1832 if (aligned_access_p (dr)
1833 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1834 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1835 continue;
1837 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1839 if (!supportable_dr_alignment)
1841 gimple stmt;
1842 int mask;
1843 tree vectype;
1845 if (known_alignment_for_access_p (dr)
1846 || VEC_length (gimple,
1847 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1848 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1850 do_versioning = false;
1851 break;
1854 stmt = DR_STMT (dr);
1855 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1856 gcc_assert (vectype);
1858 /* The rightmost bits of an aligned address must be zeros.
1859 Construct the mask needed for this test. For example,
1860 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1861 mask must be 15 = 0xf. */
1862 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1864 /* FORNOW: use the same mask to test all potentially unaligned
1865 references in the loop. The vectorizer currently supports
1866 a single vector size, see the reference to
1867 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1868 vectorization factor is computed. */
1869 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1870 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1871 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1872 VEC_safe_push (gimple, heap,
1873 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1874 DR_STMT (dr));
1878 /* Versioning requires at least one misaligned data reference. */
1879 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1880 do_versioning = false;
1881 else if (!do_versioning)
1882 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1885 if (do_versioning)
1887 VEC(gimple,heap) *may_misalign_stmts
1888 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1889 gimple stmt;
1891 /* It can now be assumed that the data references in the statements
1892 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1893 of the loop being vectorized. */
1894 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1896 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1897 dr = STMT_VINFO_DATA_REF (stmt_info);
1898 SET_DR_MISALIGNMENT (dr, 0);
1899 if (vect_print_dump_info (REPORT_ALIGNMENT))
1900 fprintf (vect_dump, "Alignment of access forced using versioning.");
1903 if (vect_print_dump_info (REPORT_DETAILS))
1904 fprintf (vect_dump, "Versioning for alignment will be applied.");
1906 /* Peeling and versioning can't be done together at this time. */
1907 gcc_assert (! (do_peeling && do_versioning));
1909 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1910 gcc_assert (stat);
1911 return stat;
1914 /* This point is reached if neither peeling nor versioning is being done. */
1915 gcc_assert (! (do_peeling || do_versioning));
1917 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1918 return stat;
1922 /* Function vect_find_same_alignment_drs.
1924 Update group and alignment relations according to the chosen
1925 vectorization factor. */
1927 static void
1928 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1929 loop_vec_info loop_vinfo)
1931 unsigned int i;
1932 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1933 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1934 struct data_reference *dra = DDR_A (ddr);
1935 struct data_reference *drb = DDR_B (ddr);
1936 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1937 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1938 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1939 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1940 lambda_vector dist_v;
1941 unsigned int loop_depth;
1943 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1944 return;
1946 if (dra == drb)
1947 return;
1949 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1950 return;
1952 /* Loop-based vectorization and known data dependence. */
1953 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1954 return;
1956 /* Data-dependence analysis reports a distance vector of zero
1957 for data-references that overlap only in the first iteration
1958 but have different sign step (see PR45764).
1959 So as a sanity check require equal DR_STEP. */
1960 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1961 return;
1963 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1964 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1966 int dist = dist_v[loop_depth];
1968 if (vect_print_dump_info (REPORT_DR_DETAILS))
1969 fprintf (vect_dump, "dependence distance = %d.", dist);
1971 /* Same loop iteration. */
1972 if (dist == 0
1973 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1975 /* Two references with distance zero have the same alignment. */
1976 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1977 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1978 if (vect_print_dump_info (REPORT_ALIGNMENT))
1979 fprintf (vect_dump, "accesses have the same alignment.");
1980 if (vect_print_dump_info (REPORT_DR_DETAILS))
1982 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1983 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1984 fprintf (vect_dump, " and ");
1985 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1992 /* Function vect_analyze_data_refs_alignment
1994 Analyze the alignment of the data-references in the loop.
1995 Return FALSE if a data reference is found that cannot be vectorized. */
1997 bool
1998 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1999 bb_vec_info bb_vinfo)
2001 if (vect_print_dump_info (REPORT_DETAILS))
2002 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2004 /* Mark groups of data references with same alignment using
2005 data dependence information. */
2006 if (loop_vinfo)
2008 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2009 struct data_dependence_relation *ddr;
2010 unsigned int i;
2012 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2013 vect_find_same_alignment_drs (ddr, loop_vinfo);
2016 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2018 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2019 fprintf (vect_dump,
2020 "not vectorized: can't calculate alignment for data ref.");
2021 return false;
2024 return true;
2028 /* Analyze groups of strided accesses: check that DR belongs to a group of
2029 strided accesses of legal size, step, etc. Detect gaps, single element
2030 interleaving, and other special cases. Set strided access info.
2031 Collect groups of strided stores for further use in SLP analysis. */
2033 static bool
2034 vect_analyze_group_access (struct data_reference *dr)
2036 tree step = DR_STEP (dr);
2037 tree scalar_type = TREE_TYPE (DR_REF (dr));
2038 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2039 gimple stmt = DR_STMT (dr);
2040 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2041 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2042 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2043 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2044 HOST_WIDE_INT stride;
2045 bool slp_impossible = false;
2047 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2048 interleaving group (including gaps). */
2049 stride = dr_step / type_size;
2051 /* Not consecutive access is possible only if it is a part of interleaving. */
2052 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2054 /* Check if it this DR is a part of interleaving, and is a single
2055 element of the group that is accessed in the loop. */
2057 /* Gaps are supported only for loads. STEP must be a multiple of the type
2058 size. The size of the group must be a power of 2. */
2059 if (DR_IS_READ (dr)
2060 && (dr_step % type_size) == 0
2061 && stride > 0
2062 && exact_log2 (stride) != -1)
2064 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2065 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2066 if (vect_print_dump_info (REPORT_DR_DETAILS))
2068 fprintf (vect_dump, "Detected single element interleaving ");
2069 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2070 fprintf (vect_dump, " step ");
2071 print_generic_expr (vect_dump, step, TDF_SLIM);
2073 return true;
2076 if (vect_print_dump_info (REPORT_DETAILS))
2078 fprintf (vect_dump, "not consecutive access ");
2079 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2082 if (bb_vinfo)
2084 /* Mark the statement as unvectorizable. */
2085 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2086 return true;
2089 return false;
2092 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2094 /* First stmt in the interleaving chain. Check the chain. */
2095 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2096 struct data_reference *data_ref = dr;
2097 unsigned int count = 1;
2098 tree next_step;
2099 tree prev_init = DR_INIT (data_ref);
2100 gimple prev = stmt;
2101 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2103 while (next)
2105 /* Skip same data-refs. In case that two or more stmts share
2106 data-ref (supported only for loads), we vectorize only the first
2107 stmt, and the rest get their vectorized loads from the first
2108 one. */
2109 if (!tree_int_cst_compare (DR_INIT (data_ref),
2110 DR_INIT (STMT_VINFO_DATA_REF (
2111 vinfo_for_stmt (next)))))
2113 if (DR_IS_WRITE (data_ref))
2115 if (vect_print_dump_info (REPORT_DETAILS))
2116 fprintf (vect_dump, "Two store stmts share the same dr.");
2117 return false;
2120 /* Check that there is no load-store dependencies for this loads
2121 to prevent a case of load-store-load to the same location. */
2122 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2123 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2125 if (vect_print_dump_info (REPORT_DETAILS))
2126 fprintf (vect_dump,
2127 "READ_WRITE dependence in interleaving.");
2128 return false;
2131 /* For load use the same data-ref load. */
2132 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2134 prev = next;
2135 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2136 continue;
2138 prev = next;
2140 /* Check that all the accesses have the same STEP. */
2141 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2142 if (tree_int_cst_compare (step, next_step))
2144 if (vect_print_dump_info (REPORT_DETAILS))
2145 fprintf (vect_dump, "not consecutive access in interleaving");
2146 return false;
2149 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2150 /* Check that the distance between two accesses is equal to the type
2151 size. Otherwise, we have gaps. */
2152 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2153 - TREE_INT_CST_LOW (prev_init)) / type_size;
2154 if (diff != 1)
2156 /* FORNOW: SLP of accesses with gaps is not supported. */
2157 slp_impossible = true;
2158 if (DR_IS_WRITE (data_ref))
2160 if (vect_print_dump_info (REPORT_DETAILS))
2161 fprintf (vect_dump, "interleaved store with gaps");
2162 return false;
2165 gaps += diff - 1;
2168 /* Store the gap from the previous member of the group. If there is no
2169 gap in the access, DR_GROUP_GAP is always 1. */
2170 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2172 prev_init = DR_INIT (data_ref);
2173 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2174 /* Count the number of data-refs in the chain. */
2175 count++;
2178 /* COUNT is the number of accesses found, we multiply it by the size of
2179 the type to get COUNT_IN_BYTES. */
2180 count_in_bytes = type_size * count;
2182 /* Check that the size of the interleaving (including gaps) is not
2183 greater than STEP. */
2184 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2186 if (vect_print_dump_info (REPORT_DETAILS))
2188 fprintf (vect_dump, "interleaving size is greater than step for ");
2189 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2191 return false;
2194 /* Check that the size of the interleaving is equal to STEP for stores,
2195 i.e., that there are no gaps. */
2196 if (dr_step && dr_step != count_in_bytes)
2198 if (DR_IS_READ (dr))
2200 slp_impossible = true;
2201 /* There is a gap after the last load in the group. This gap is a
2202 difference between the stride and the number of elements. When
2203 there is no gap, this difference should be 0. */
2204 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2206 else
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "interleaved store with gaps");
2210 return false;
2214 /* Check that STEP is a multiple of type size. */
2215 if (dr_step && (dr_step % type_size) != 0)
2217 if (vect_print_dump_info (REPORT_DETAILS))
2219 fprintf (vect_dump, "step is not a multiple of type size: step ");
2220 print_generic_expr (vect_dump, step, TDF_SLIM);
2221 fprintf (vect_dump, " size ");
2222 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2223 TDF_SLIM);
2225 return false;
2228 /* FORNOW: we handle only interleaving that is a power of 2.
2229 We don't fail here if it may be still possible to vectorize the
2230 group using SLP. If not, the size of the group will be checked in
2231 vect_analyze_operations, and the vectorization will fail. */
2232 if (exact_log2 (stride) == -1)
2234 if (vect_print_dump_info (REPORT_DETAILS))
2235 fprintf (vect_dump, "interleaving is not a power of 2");
2237 if (slp_impossible)
2238 return false;
2241 if (stride == 0)
2242 stride = count;
2244 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2245 if (vect_print_dump_info (REPORT_DETAILS))
2246 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2248 /* SLP: create an SLP data structure for every interleaving group of
2249 stores for further analysis in vect_analyse_slp. */
2250 if (DR_IS_WRITE (dr) && !slp_impossible)
2252 if (loop_vinfo)
2253 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2254 stmt);
2255 if (bb_vinfo)
2256 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2257 stmt);
2261 return true;
2265 /* Analyze the access pattern of the data-reference DR.
2266 In case of non-consecutive accesses call vect_analyze_group_access() to
2267 analyze groups of strided accesses. */
2269 static bool
2270 vect_analyze_data_ref_access (struct data_reference *dr)
2272 tree step = DR_STEP (dr);
2273 tree scalar_type = TREE_TYPE (DR_REF (dr));
2274 gimple stmt = DR_STMT (dr);
2275 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2276 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2277 struct loop *loop = NULL;
2278 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2280 if (loop_vinfo)
2281 loop = LOOP_VINFO_LOOP (loop_vinfo);
2283 if (loop_vinfo && !step)
2285 if (vect_print_dump_info (REPORT_DETAILS))
2286 fprintf (vect_dump, "bad data-ref access in loop");
2287 return false;
2290 /* Don't allow invariant accesses in loops. */
2291 if (loop_vinfo && dr_step == 0)
2292 return false;
2294 if (loop && nested_in_vect_loop_p (loop, stmt))
2296 /* Interleaved accesses are not yet supported within outer-loop
2297 vectorization for references in the inner-loop. */
2298 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2300 /* For the rest of the analysis we use the outer-loop step. */
2301 step = STMT_VINFO_DR_STEP (stmt_info);
2302 dr_step = TREE_INT_CST_LOW (step);
2304 if (dr_step == 0)
2306 if (vect_print_dump_info (REPORT_ALIGNMENT))
2307 fprintf (vect_dump, "zero step in outer loop.");
2308 if (DR_IS_READ (dr))
2309 return true;
2310 else
2311 return false;
2315 /* Consecutive? */
2316 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2317 || (dr_step < 0
2318 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2320 /* Mark that it is not interleaving. */
2321 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2322 return true;
2325 if (loop && nested_in_vect_loop_p (loop, stmt))
2327 if (vect_print_dump_info (REPORT_ALIGNMENT))
2328 fprintf (vect_dump, "strided access in outer loop.");
2329 return false;
2332 /* Not consecutive access - check if it's a part of interleaving group. */
2333 return vect_analyze_group_access (dr);
2337 /* Function vect_analyze_data_ref_accesses.
2339 Analyze the access pattern of all the data references in the loop.
2341 FORNOW: the only access pattern that is considered vectorizable is a
2342 simple step 1 (consecutive) access.
2344 FORNOW: handle only arrays and pointer accesses. */
2346 bool
2347 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2349 unsigned int i;
2350 VEC (data_reference_p, heap) *datarefs;
2351 struct data_reference *dr;
2353 if (vect_print_dump_info (REPORT_DETAILS))
2354 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2356 if (loop_vinfo)
2357 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2358 else
2359 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2361 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2362 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2363 && !vect_analyze_data_ref_access (dr))
2365 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2366 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2368 if (bb_vinfo)
2370 /* Mark the statement as not vectorizable. */
2371 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2372 continue;
2374 else
2375 return false;
2378 return true;
2381 /* Function vect_prune_runtime_alias_test_list.
2383 Prune a list of ddrs to be tested at run-time by versioning for alias.
2384 Return FALSE if resulting list of ddrs is longer then allowed by
2385 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2387 bool
2388 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2390 VEC (ddr_p, heap) * ddrs =
2391 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2392 unsigned i, j;
2394 if (vect_print_dump_info (REPORT_DETAILS))
2395 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2397 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2399 bool found;
2400 ddr_p ddr_i;
2402 ddr_i = VEC_index (ddr_p, ddrs, i);
2403 found = false;
2405 for (j = 0; j < i; j++)
2407 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2409 if (vect_vfa_range_equal (ddr_i, ddr_j))
2411 if (vect_print_dump_info (REPORT_DR_DETAILS))
2413 fprintf (vect_dump, "found equal ranges ");
2414 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2415 fprintf (vect_dump, ", ");
2416 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2417 fprintf (vect_dump, " and ");
2418 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2419 fprintf (vect_dump, ", ");
2420 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2422 found = true;
2423 break;
2427 if (found)
2429 VEC_ordered_remove (ddr_p, ddrs, i);
2430 continue;
2432 i++;
2435 if (VEC_length (ddr_p, ddrs) >
2436 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2438 if (vect_print_dump_info (REPORT_DR_DETAILS))
2440 fprintf (vect_dump,
2441 "disable versioning for alias - max number of generated "
2442 "checks exceeded.");
2445 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2447 return false;
2450 return true;
2454 /* Function vect_analyze_data_refs.
2456 Find all the data references in the loop or basic block.
2458 The general structure of the analysis of data refs in the vectorizer is as
2459 follows:
2460 1- vect_analyze_data_refs(loop/bb): call
2461 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2462 in the loop/bb and their dependences.
2463 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2464 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2465 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2469 bool
2470 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2471 bb_vec_info bb_vinfo,
2472 int *min_vf)
2474 struct loop *loop = NULL;
2475 basic_block bb = NULL;
2476 unsigned int i;
2477 VEC (data_reference_p, heap) *datarefs;
2478 struct data_reference *dr;
2479 tree scalar_type;
2480 bool res;
2482 if (vect_print_dump_info (REPORT_DETAILS))
2483 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2485 if (loop_vinfo)
2487 loop = LOOP_VINFO_LOOP (loop_vinfo);
2488 res = compute_data_dependences_for_loop
2489 (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
2490 &LOOP_VINFO_DDRS (loop_vinfo));
2492 if (!res)
2494 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2495 fprintf (vect_dump, "not vectorized: loop contains function calls"
2496 " or data references that cannot be analyzed");
2497 return false;
2500 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2502 else
2504 bb = BB_VINFO_BB (bb_vinfo);
2505 res = compute_data_dependences_for_bb (bb, true,
2506 &BB_VINFO_DATAREFS (bb_vinfo),
2507 &BB_VINFO_DDRS (bb_vinfo));
2508 if (!res)
2510 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2511 fprintf (vect_dump, "not vectorized: basic block contains function"
2512 " calls or data references that cannot be analyzed");
2513 return false;
2516 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2519 /* Go through the data-refs, check that the analysis succeeded. Update
2520 pointer from stmt_vec_info struct to DR and vectype. */
2522 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2524 gimple stmt;
2525 stmt_vec_info stmt_info;
2526 tree base, offset, init;
2527 int vf;
2529 if (!dr || !DR_REF (dr))
2531 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2532 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2533 return false;
2536 stmt = DR_STMT (dr);
2537 stmt_info = vinfo_for_stmt (stmt);
2539 /* Check that analysis of the data-ref succeeded. */
2540 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2541 || !DR_STEP (dr))
2543 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2545 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2546 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2549 if (bb_vinfo)
2551 /* Mark the statement as not vectorizable. */
2552 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2553 continue;
2555 else
2556 return false;
2559 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2561 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2562 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2563 "constant");
2564 if (bb_vinfo)
2566 /* Mark the statement as not vectorizable. */
2567 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2568 continue;
2570 else
2571 return false;
2574 base = unshare_expr (DR_BASE_ADDRESS (dr));
2575 offset = unshare_expr (DR_OFFSET (dr));
2576 init = unshare_expr (DR_INIT (dr));
2578 if (stmt_could_throw_p (stmt))
2580 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2582 fprintf (vect_dump, "not vectorized: statement can throw an "
2583 "exception ");
2584 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2586 return false;
2589 /* Update DR field in stmt_vec_info struct. */
2591 /* If the dataref is in an inner-loop of the loop that is considered for
2592 for vectorization, we also want to analyze the access relative to
2593 the outer-loop (DR contains information only relative to the
2594 inner-most enclosing loop). We do that by building a reference to the
2595 first location accessed by the inner-loop, and analyze it relative to
2596 the outer-loop. */
2597 if (loop && nested_in_vect_loop_p (loop, stmt))
2599 tree outer_step, outer_base, outer_init;
2600 HOST_WIDE_INT pbitsize, pbitpos;
2601 tree poffset;
2602 enum machine_mode pmode;
2603 int punsignedp, pvolatilep;
2604 affine_iv base_iv, offset_iv;
2605 tree dinit;
2607 /* Build a reference to the first location accessed by the
2608 inner-loop: *(BASE+INIT). (The first location is actually
2609 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2610 tree inner_base = build_fold_indirect_ref
2611 (fold_build2 (POINTER_PLUS_EXPR,
2612 TREE_TYPE (base), base,
2613 fold_convert (sizetype, init)));
2615 if (vect_print_dump_info (REPORT_DETAILS))
2617 fprintf (vect_dump, "analyze in outer-loop: ");
2618 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2621 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2622 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2623 gcc_assert (outer_base != NULL_TREE);
2625 if (pbitpos % BITS_PER_UNIT != 0)
2627 if (vect_print_dump_info (REPORT_DETAILS))
2628 fprintf (vect_dump, "failed: bit offset alignment.\n");
2629 return false;
2632 outer_base = build_fold_addr_expr (outer_base);
2633 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2634 &base_iv, false))
2636 if (vect_print_dump_info (REPORT_DETAILS))
2637 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2638 return false;
2641 if (offset)
2643 if (poffset)
2644 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2645 poffset);
2646 else
2647 poffset = offset;
2650 if (!poffset)
2652 offset_iv.base = ssize_int (0);
2653 offset_iv.step = ssize_int (0);
2655 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2656 &offset_iv, false))
2658 if (vect_print_dump_info (REPORT_DETAILS))
2659 fprintf (vect_dump, "evolution of offset is not affine.\n");
2660 return false;
2663 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2664 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2665 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2666 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2667 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2669 outer_step = size_binop (PLUS_EXPR,
2670 fold_convert (ssizetype, base_iv.step),
2671 fold_convert (ssizetype, offset_iv.step));
2673 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2674 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2675 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2676 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2677 STMT_VINFO_DR_OFFSET (stmt_info) =
2678 fold_convert (ssizetype, offset_iv.base);
2679 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2680 size_int (highest_pow2_factor (offset_iv.base));
2682 if (vect_print_dump_info (REPORT_DETAILS))
2684 fprintf (vect_dump, "\touter base_address: ");
2685 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2686 fprintf (vect_dump, "\n\touter offset from base address: ");
2687 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2688 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2689 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2690 fprintf (vect_dump, "\n\touter step: ");
2691 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2692 fprintf (vect_dump, "\n\touter aligned to: ");
2693 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2697 if (STMT_VINFO_DATA_REF (stmt_info))
2699 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2701 fprintf (vect_dump,
2702 "not vectorized: more than one data ref in stmt: ");
2703 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2705 return false;
2708 STMT_VINFO_DATA_REF (stmt_info) = dr;
2710 /* Set vectype for STMT. */
2711 scalar_type = TREE_TYPE (DR_REF (dr));
2712 STMT_VINFO_VECTYPE (stmt_info) =
2713 get_vectype_for_scalar_type (scalar_type);
2714 if (!STMT_VINFO_VECTYPE (stmt_info))
2716 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2718 fprintf (vect_dump,
2719 "not vectorized: no vectype for stmt: ");
2720 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2721 fprintf (vect_dump, " scalar_type: ");
2722 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2725 if (bb_vinfo)
2727 /* Mark the statement as not vectorizable. */
2728 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2729 continue;
2731 else
2732 return false;
2735 /* Adjust the minimal vectorization factor according to the
2736 vector type. */
2737 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2738 if (vf > *min_vf)
2739 *min_vf = vf;
2742 return true;
2746 /* Function vect_get_new_vect_var.
2748 Returns a name for a new variable. The current naming scheme appends the
2749 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2750 the name of vectorizer generated variables, and appends that to NAME if
2751 provided. */
2753 tree
2754 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2756 const char *prefix;
2757 tree new_vect_var;
2759 switch (var_kind)
2761 case vect_simple_var:
2762 prefix = "vect_";
2763 break;
2764 case vect_scalar_var:
2765 prefix = "stmp_";
2766 break;
2767 case vect_pointer_var:
2768 prefix = "vect_p";
2769 break;
2770 default:
2771 gcc_unreachable ();
2774 if (name)
2776 char* tmp = concat (prefix, name, NULL);
2777 new_vect_var = create_tmp_var (type, tmp);
2778 free (tmp);
2780 else
2781 new_vect_var = create_tmp_var (type, prefix);
2783 /* Mark vector typed variable as a gimple register variable. */
2784 if (TREE_CODE (type) == VECTOR_TYPE)
2785 DECL_GIMPLE_REG_P (new_vect_var) = true;
2787 return new_vect_var;
2791 /* Function vect_create_addr_base_for_vector_ref.
2793 Create an expression that computes the address of the first memory location
2794 that will be accessed for a data reference.
2796 Input:
2797 STMT: The statement containing the data reference.
2798 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2799 OFFSET: Optional. If supplied, it is be added to the initial address.
2800 LOOP: Specify relative to which loop-nest should the address be computed.
2801 For example, when the dataref is in an inner-loop nested in an
2802 outer-loop that is now being vectorized, LOOP can be either the
2803 outer-loop, or the inner-loop. The first memory location accessed
2804 by the following dataref ('in' points to short):
2806 for (i=0; i<N; i++)
2807 for (j=0; j<M; j++)
2808 s += in[i+j]
2810 is as follows:
2811 if LOOP=i_loop: &in (relative to i_loop)
2812 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2814 Output:
2815 1. Return an SSA_NAME whose value is the address of the memory location of
2816 the first vector of the data reference.
2817 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2818 these statement(s) which define the returned SSA_NAME.
2820 FORNOW: We are only handling array accesses with step 1. */
2822 tree
2823 vect_create_addr_base_for_vector_ref (gimple stmt,
2824 gimple_seq *new_stmt_list,
2825 tree offset,
2826 struct loop *loop)
2828 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2829 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2830 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2831 tree base_name;
2832 tree data_ref_base_var;
2833 tree vec_stmt;
2834 tree addr_base, addr_expr;
2835 tree dest;
2836 gimple_seq seq = NULL;
2837 tree base_offset = unshare_expr (DR_OFFSET (dr));
2838 tree init = unshare_expr (DR_INIT (dr));
2839 tree vect_ptr_type;
2840 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2841 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2842 tree base;
2844 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2846 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2848 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2850 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2851 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2852 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2855 if (loop_vinfo)
2856 base_name = build_fold_indirect_ref (data_ref_base);
2857 else
2859 base_offset = ssize_int (0);
2860 init = ssize_int (0);
2861 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2864 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2865 add_referenced_var (data_ref_base_var);
2866 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2867 data_ref_base_var);
2868 gimple_seq_add_seq (new_stmt_list, seq);
2870 /* Create base_offset */
2871 base_offset = size_binop (PLUS_EXPR,
2872 fold_convert (sizetype, base_offset),
2873 fold_convert (sizetype, init));
2874 dest = create_tmp_var (sizetype, "base_off");
2875 add_referenced_var (dest);
2876 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2877 gimple_seq_add_seq (new_stmt_list, seq);
2879 if (offset)
2881 tree tmp = create_tmp_var (sizetype, "offset");
2883 add_referenced_var (tmp);
2884 offset = fold_build2 (MULT_EXPR, sizetype,
2885 fold_convert (sizetype, offset), step);
2886 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2887 base_offset, offset);
2888 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2889 gimple_seq_add_seq (new_stmt_list, seq);
2892 /* base + base_offset */
2893 if (loop_vinfo)
2894 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2895 data_ref_base, base_offset);
2896 else
2898 addr_base = build1 (ADDR_EXPR,
2899 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2900 unshare_expr (DR_REF (dr)));
2903 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2904 base = get_base_address (DR_REF (dr));
2905 if (base
2906 && TREE_CODE (base) == MEM_REF)
2907 vect_ptr_type
2908 = build_qualified_type (vect_ptr_type,
2909 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2911 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2912 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2913 get_name (base_name));
2914 add_referenced_var (addr_expr);
2915 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2916 gimple_seq_add_seq (new_stmt_list, seq);
2918 if (DR_PTR_INFO (dr)
2919 && TREE_CODE (vec_stmt) == SSA_NAME)
2920 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2922 if (vect_print_dump_info (REPORT_DETAILS))
2924 fprintf (vect_dump, "created ");
2925 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2928 return vec_stmt;
2932 /* Function vect_create_data_ref_ptr.
2934 Create a new pointer to vector type (vp), that points to the first location
2935 accessed in the loop by STMT, along with the def-use update chain to
2936 appropriately advance the pointer through the loop iterations. Also set
2937 aliasing information for the pointer. This vector pointer is used by the
2938 callers to this function to create a memory reference expression for vector
2939 load/store access.
2941 Input:
2942 1. STMT: a stmt that references memory. Expected to be of the form
2943 GIMPLE_ASSIGN <name, data-ref> or
2944 GIMPLE_ASSIGN <data-ref, name>.
2945 2. AT_LOOP: the loop where the vector memref is to be created.
2946 3. OFFSET (optional): an offset to be added to the initial address accessed
2947 by the data-ref in STMT.
2948 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2949 pointing to the initial address.
2950 5. TYPE: if not NULL indicates the required type of the data-ref.
2952 Output:
2953 1. Declare a new ptr to vector_type, and have it point to the base of the
2954 data reference (initial addressed accessed by the data reference).
2955 For example, for vector of type V8HI, the following code is generated:
2957 v8hi *vp;
2958 vp = (v8hi *)initial_address;
2960 if OFFSET is not supplied:
2961 initial_address = &a[init];
2962 if OFFSET is supplied:
2963 initial_address = &a[init + OFFSET];
2965 Return the initial_address in INITIAL_ADDRESS.
2967 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2968 update the pointer in each iteration of the loop.
2970 Return the increment stmt that updates the pointer in PTR_INCR.
2972 3. Set INV_P to true if the access pattern of the data reference in the
2973 vectorized loop is invariant. Set it to false otherwise.
2975 4. Return the pointer. */
2977 tree
2978 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2979 tree offset, tree *initial_address, gimple *ptr_incr,
2980 bool only_init, bool *inv_p)
2982 tree base_name;
2983 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2984 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2985 struct loop *loop = NULL;
2986 bool nested_in_vect_loop = false;
2987 struct loop *containing_loop = NULL;
2988 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2989 tree vect_ptr_type;
2990 tree vect_ptr;
2991 tree new_temp;
2992 gimple vec_stmt;
2993 gimple_seq new_stmt_list = NULL;
2994 edge pe = NULL;
2995 basic_block new_bb;
2996 tree vect_ptr_init;
2997 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2998 tree vptr;
2999 gimple_stmt_iterator incr_gsi;
3000 bool insert_after;
3001 bool negative;
3002 tree indx_before_incr, indx_after_incr;
3003 gimple incr;
3004 tree step;
3005 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3006 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3007 tree base;
3009 if (loop_vinfo)
3011 loop = LOOP_VINFO_LOOP (loop_vinfo);
3012 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3013 containing_loop = (gimple_bb (stmt))->loop_father;
3014 pe = loop_preheader_edge (loop);
3016 else
3018 gcc_assert (bb_vinfo);
3019 only_init = true;
3020 *ptr_incr = NULL;
3023 /* Check the step (evolution) of the load in LOOP, and record
3024 whether it's invariant. */
3025 if (nested_in_vect_loop)
3026 step = STMT_VINFO_DR_STEP (stmt_info);
3027 else
3028 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3030 if (tree_int_cst_compare (step, size_zero_node) == 0)
3031 *inv_p = true;
3032 else
3033 *inv_p = false;
3034 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3036 /* Create an expression for the first address accessed by this load
3037 in LOOP. */
3038 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3040 if (vect_print_dump_info (REPORT_DETAILS))
3042 tree data_ref_base = base_name;
3043 fprintf (vect_dump, "create vector-pointer variable to type: ");
3044 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3045 if (TREE_CODE (data_ref_base) == VAR_DECL
3046 || TREE_CODE (data_ref_base) == ARRAY_REF)
3047 fprintf (vect_dump, " vectorizing an array ref: ");
3048 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3049 fprintf (vect_dump, " vectorizing a record based array ref: ");
3050 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3051 fprintf (vect_dump, " vectorizing a pointer ref: ");
3052 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3055 /* (1) Create the new vector-pointer variable. */
3056 vect_ptr_type = build_pointer_type (vectype);
3057 base = get_base_address (DR_REF (dr));
3058 if (base
3059 && TREE_CODE (base) == MEM_REF)
3060 vect_ptr_type
3061 = build_qualified_type (vect_ptr_type,
3062 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3063 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3064 get_name (base_name));
3066 /* Vector types inherit the alias set of their component type by default so
3067 we need to use a ref-all pointer if the data reference does not conflict
3068 with the created vector data reference because it is not addressable. */
3069 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3070 get_alias_set (DR_REF (dr))))
3072 vect_ptr_type
3073 = build_pointer_type_for_mode (vectype,
3074 TYPE_MODE (vect_ptr_type), true);
3075 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3076 get_name (base_name));
3079 /* Likewise for any of the data references in the stmt group. */
3080 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
3082 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
3085 tree lhs = gimple_assign_lhs (orig_stmt);
3086 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3087 get_alias_set (lhs)))
3089 vect_ptr_type
3090 = build_pointer_type_for_mode (vectype,
3091 TYPE_MODE (vect_ptr_type), true);
3092 vect_ptr
3093 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3094 get_name (base_name));
3095 break;
3098 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
3100 while (orig_stmt);
3103 add_referenced_var (vect_ptr);
3105 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3106 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3107 def-use update cycles for the pointer: one relative to the outer-loop
3108 (LOOP), which is what steps (3) and (4) below do. The other is relative
3109 to the inner-loop (which is the inner-most loop containing the dataref),
3110 and this is done be step (5) below.
3112 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3113 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3114 redundant. Steps (3),(4) create the following:
3116 vp0 = &base_addr;
3117 LOOP: vp1 = phi(vp0,vp2)
3120 vp2 = vp1 + step
3121 goto LOOP
3123 If there is an inner-loop nested in loop, then step (5) will also be
3124 applied, and an additional update in the inner-loop will be created:
3126 vp0 = &base_addr;
3127 LOOP: vp1 = phi(vp0,vp2)
3129 inner: vp3 = phi(vp1,vp4)
3130 vp4 = vp3 + inner_step
3131 if () goto inner
3133 vp2 = vp1 + step
3134 if () goto LOOP */
3136 /* (2) Calculate the initial address the vector-pointer, and set
3137 the vector-pointer to point to it before the loop. */
3139 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3141 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3142 offset, loop);
3143 if (new_stmt_list)
3145 if (pe)
3147 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3148 gcc_assert (!new_bb);
3150 else
3151 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
3154 *initial_address = new_temp;
3156 /* Create: p = (vectype *) initial_base */
3157 if (TREE_CODE (new_temp) != SSA_NAME
3158 || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
3160 vec_stmt = gimple_build_assign (vect_ptr,
3161 fold_convert (vect_ptr_type, new_temp));
3162 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
3163 /* Copy the points-to information if it exists. */
3164 if (DR_PTR_INFO (dr))
3165 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
3166 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
3167 if (pe)
3169 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3170 gcc_assert (!new_bb);
3172 else
3173 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
3175 else
3176 vect_ptr_init = new_temp;
3178 /* (3) Handle the updating of the vector-pointer inside the loop.
3179 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3180 inner-loop nested in LOOP (during outer-loop vectorization). */
3182 /* No update in loop is required. */
3183 if (only_init && (!loop_vinfo || at_loop == loop))
3184 vptr = vect_ptr_init;
3185 else
3187 /* The step of the vector pointer is the Vector Size. */
3188 tree step = TYPE_SIZE_UNIT (vectype);
3189 /* One exception to the above is when the scalar step of the load in
3190 LOOP is zero. In this case the step here is also zero. */
3191 if (*inv_p)
3192 step = size_zero_node;
3193 else if (negative)
3194 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3196 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3198 create_iv (vect_ptr_init,
3199 fold_convert (vect_ptr_type, step),
3200 vect_ptr, loop, &incr_gsi, insert_after,
3201 &indx_before_incr, &indx_after_incr);
3202 incr = gsi_stmt (incr_gsi);
3203 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3205 /* Copy the points-to information if it exists. */
3206 if (DR_PTR_INFO (dr))
3208 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3209 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3211 if (ptr_incr)
3212 *ptr_incr = incr;
3214 vptr = indx_before_incr;
3217 if (!nested_in_vect_loop || only_init)
3218 return vptr;
3221 /* (4) Handle the updating of the vector-pointer inside the inner-loop
3222 nested in LOOP, if exists. */
3224 gcc_assert (nested_in_vect_loop);
3225 if (!only_init)
3227 standard_iv_increment_position (containing_loop, &incr_gsi,
3228 &insert_after);
3229 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
3230 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3231 &indx_after_incr);
3232 incr = gsi_stmt (incr_gsi);
3233 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3235 /* Copy the points-to information if it exists. */
3236 if (DR_PTR_INFO (dr))
3238 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3239 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3241 if (ptr_incr)
3242 *ptr_incr = incr;
3244 return indx_before_incr;
3246 else
3247 gcc_unreachable ();
3251 /* Function bump_vector_ptr
3253 Increment a pointer (to a vector type) by vector-size. If requested,
3254 i.e. if PTR-INCR is given, then also connect the new increment stmt
3255 to the existing def-use update-chain of the pointer, by modifying
3256 the PTR_INCR as illustrated below:
3258 The pointer def-use update-chain before this function:
3259 DATAREF_PTR = phi (p_0, p_2)
3260 ....
3261 PTR_INCR: p_2 = DATAREF_PTR + step
3263 The pointer def-use update-chain after this function:
3264 DATAREF_PTR = phi (p_0, p_2)
3265 ....
3266 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3267 ....
3268 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3270 Input:
3271 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3272 in the loop.
3273 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3274 the loop. The increment amount across iterations is expected
3275 to be vector_size.
3276 BSI - location where the new update stmt is to be placed.
3277 STMT - the original scalar memory-access stmt that is being vectorized.
3278 BUMP - optional. The offset by which to bump the pointer. If not given,
3279 the offset is assumed to be vector_size.
3281 Output: Return NEW_DATAREF_PTR as illustrated above.
3285 tree
3286 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3287 gimple stmt, tree bump)
3289 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3290 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3291 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3292 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3293 tree update = TYPE_SIZE_UNIT (vectype);
3294 gimple incr_stmt;
3295 ssa_op_iter iter;
3296 use_operand_p use_p;
3297 tree new_dataref_ptr;
3299 if (bump)
3300 update = bump;
3302 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3303 dataref_ptr, update);
3304 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3305 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3306 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3308 /* Copy the points-to information if it exists. */
3309 if (DR_PTR_INFO (dr))
3310 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3312 if (!ptr_incr)
3313 return new_dataref_ptr;
3315 /* Update the vector-pointer's cross-iteration increment. */
3316 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3318 tree use = USE_FROM_PTR (use_p);
3320 if (use == dataref_ptr)
3321 SET_USE (use_p, new_dataref_ptr);
3322 else
3323 gcc_assert (tree_int_cst_compare (use, update) == 0);
3326 return new_dataref_ptr;
3330 /* Function vect_create_destination_var.
3332 Create a new temporary of type VECTYPE. */
3334 tree
3335 vect_create_destination_var (tree scalar_dest, tree vectype)
3337 tree vec_dest;
3338 const char *new_name;
3339 tree type;
3340 enum vect_var_kind kind;
3342 kind = vectype ? vect_simple_var : vect_scalar_var;
3343 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3345 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3347 new_name = get_name (scalar_dest);
3348 if (!new_name)
3349 new_name = "var_";
3350 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3351 add_referenced_var (vec_dest);
3353 return vec_dest;
3356 /* Function vect_strided_store_supported.
3358 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3359 and FALSE otherwise. */
3361 bool
3362 vect_strided_store_supported (tree vectype)
3364 optab interleave_high_optab, interleave_low_optab;
3365 enum machine_mode mode;
3367 mode = TYPE_MODE (vectype);
3369 /* Check that the operation is supported. */
3370 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3371 vectype, optab_default);
3372 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3373 vectype, optab_default);
3374 if (!interleave_high_optab || !interleave_low_optab)
3376 if (vect_print_dump_info (REPORT_DETAILS))
3377 fprintf (vect_dump, "no optab for interleave.");
3378 return false;
3381 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3382 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3384 if (vect_print_dump_info (REPORT_DETAILS))
3385 fprintf (vect_dump, "interleave op not supported by target.");
3386 return false;
3389 return true;
3393 /* Function vect_permute_store_chain.
3395 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3396 a power of 2, generate interleave_high/low stmts to reorder the data
3397 correctly for the stores. Return the final references for stores in
3398 RESULT_CHAIN.
3400 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3401 The input is 4 vectors each containing 8 elements. We assign a number to
3402 each element, the input sequence is:
3404 1st vec: 0 1 2 3 4 5 6 7
3405 2nd vec: 8 9 10 11 12 13 14 15
3406 3rd vec: 16 17 18 19 20 21 22 23
3407 4th vec: 24 25 26 27 28 29 30 31
3409 The output sequence should be:
3411 1st vec: 0 8 16 24 1 9 17 25
3412 2nd vec: 2 10 18 26 3 11 19 27
3413 3rd vec: 4 12 20 28 5 13 21 30
3414 4th vec: 6 14 22 30 7 15 23 31
3416 i.e., we interleave the contents of the four vectors in their order.
3418 We use interleave_high/low instructions to create such output. The input of
3419 each interleave_high/low operation is two vectors:
3420 1st vec 2nd vec
3421 0 1 2 3 4 5 6 7
3422 the even elements of the result vector are obtained left-to-right from the
3423 high/low elements of the first vector. The odd elements of the result are
3424 obtained left-to-right from the high/low elements of the second vector.
3425 The output of interleave_high will be: 0 4 1 5
3426 and of interleave_low: 2 6 3 7
3429 The permutation is done in log LENGTH stages. In each stage interleave_high
3430 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3431 where the first argument is taken from the first half of DR_CHAIN and the
3432 second argument from it's second half.
3433 In our example,
3435 I1: interleave_high (1st vec, 3rd vec)
3436 I2: interleave_low (1st vec, 3rd vec)
3437 I3: interleave_high (2nd vec, 4th vec)
3438 I4: interleave_low (2nd vec, 4th vec)
3440 The output for the first stage is:
3442 I1: 0 16 1 17 2 18 3 19
3443 I2: 4 20 5 21 6 22 7 23
3444 I3: 8 24 9 25 10 26 11 27
3445 I4: 12 28 13 29 14 30 15 31
3447 The output of the second stage, i.e. the final result is:
3449 I1: 0 8 16 24 1 9 17 25
3450 I2: 2 10 18 26 3 11 19 27
3451 I3: 4 12 20 28 5 13 21 30
3452 I4: 6 14 22 30 7 15 23 31. */
3454 bool
3455 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3456 unsigned int length,
3457 gimple stmt,
3458 gimple_stmt_iterator *gsi,
3459 VEC(tree,heap) **result_chain)
3461 tree perm_dest, vect1, vect2, high, low;
3462 gimple perm_stmt;
3463 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3464 int i;
3465 unsigned int j;
3466 enum tree_code high_code, low_code;
3468 /* Check that the operation is supported. */
3469 if (!vect_strided_store_supported (vectype))
3470 return false;
3472 *result_chain = VEC_copy (tree, heap, dr_chain);
3474 for (i = 0; i < exact_log2 (length); i++)
3476 for (j = 0; j < length/2; j++)
3478 vect1 = VEC_index (tree, dr_chain, j);
3479 vect2 = VEC_index (tree, dr_chain, j+length/2);
3481 /* Create interleaving stmt:
3482 in the case of big endian:
3483 high = interleave_high (vect1, vect2)
3484 and in the case of little endian:
3485 high = interleave_low (vect1, vect2). */
3486 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3487 DECL_GIMPLE_REG_P (perm_dest) = 1;
3488 add_referenced_var (perm_dest);
3489 if (BYTES_BIG_ENDIAN)
3491 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3492 low_code = VEC_INTERLEAVE_LOW_EXPR;
3494 else
3496 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3497 high_code = VEC_INTERLEAVE_LOW_EXPR;
3499 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3500 vect1, vect2);
3501 high = make_ssa_name (perm_dest, perm_stmt);
3502 gimple_assign_set_lhs (perm_stmt, high);
3503 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3504 VEC_replace (tree, *result_chain, 2*j, high);
3506 /* Create interleaving stmt:
3507 in the case of big endian:
3508 low = interleave_low (vect1, vect2)
3509 and in the case of little endian:
3510 low = interleave_high (vect1, vect2). */
3511 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3512 DECL_GIMPLE_REG_P (perm_dest) = 1;
3513 add_referenced_var (perm_dest);
3514 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3515 vect1, vect2);
3516 low = make_ssa_name (perm_dest, perm_stmt);
3517 gimple_assign_set_lhs (perm_stmt, low);
3518 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3519 VEC_replace (tree, *result_chain, 2*j+1, low);
3521 dr_chain = VEC_copy (tree, heap, *result_chain);
3523 return true;
3526 /* Function vect_setup_realignment
3528 This function is called when vectorizing an unaligned load using
3529 the dr_explicit_realign[_optimized] scheme.
3530 This function generates the following code at the loop prolog:
3532 p = initial_addr;
3533 x msq_init = *(floor(p)); # prolog load
3534 realignment_token = call target_builtin;
3535 loop:
3536 x msq = phi (msq_init, ---)
3538 The stmts marked with x are generated only for the case of
3539 dr_explicit_realign_optimized.
3541 The code above sets up a new (vector) pointer, pointing to the first
3542 location accessed by STMT, and a "floor-aligned" load using that pointer.
3543 It also generates code to compute the "realignment-token" (if the relevant
3544 target hook was defined), and creates a phi-node at the loop-header bb
3545 whose arguments are the result of the prolog-load (created by this
3546 function) and the result of a load that takes place in the loop (to be
3547 created by the caller to this function).
3549 For the case of dr_explicit_realign_optimized:
3550 The caller to this function uses the phi-result (msq) to create the
3551 realignment code inside the loop, and sets up the missing phi argument,
3552 as follows:
3553 loop:
3554 msq = phi (msq_init, lsq)
3555 lsq = *(floor(p')); # load in loop
3556 result = realign_load (msq, lsq, realignment_token);
3558 For the case of dr_explicit_realign:
3559 loop:
3560 msq = *(floor(p)); # load in loop
3561 p' = p + (VS-1);
3562 lsq = *(floor(p')); # load in loop
3563 result = realign_load (msq, lsq, realignment_token);
3565 Input:
3566 STMT - (scalar) load stmt to be vectorized. This load accesses
3567 a memory location that may be unaligned.
3568 BSI - place where new code is to be inserted.
3569 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3570 is used.
3572 Output:
3573 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3574 target hook, if defined.
3575 Return value - the result of the loop-header phi node. */
3577 tree
3578 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3579 tree *realignment_token,
3580 enum dr_alignment_support alignment_support_scheme,
3581 tree init_addr,
3582 struct loop **at_loop)
3584 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3585 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3586 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3587 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3588 struct loop *loop = NULL;
3589 edge pe = NULL;
3590 tree scalar_dest = gimple_assign_lhs (stmt);
3591 tree vec_dest;
3592 gimple inc;
3593 tree ptr;
3594 tree data_ref;
3595 gimple new_stmt;
3596 basic_block new_bb;
3597 tree msq_init = NULL_TREE;
3598 tree new_temp;
3599 gimple phi_stmt;
3600 tree msq = NULL_TREE;
3601 gimple_seq stmts = NULL;
3602 bool inv_p;
3603 bool compute_in_loop = false;
3604 bool nested_in_vect_loop = false;
3605 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3606 struct loop *loop_for_initial_load = NULL;
3608 if (loop_vinfo)
3610 loop = LOOP_VINFO_LOOP (loop_vinfo);
3611 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3614 gcc_assert (alignment_support_scheme == dr_explicit_realign
3615 || alignment_support_scheme == dr_explicit_realign_optimized);
3617 /* We need to generate three things:
3618 1. the misalignment computation
3619 2. the extra vector load (for the optimized realignment scheme).
3620 3. the phi node for the two vectors from which the realignment is
3621 done (for the optimized realignment scheme). */
3623 /* 1. Determine where to generate the misalignment computation.
3625 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3626 calculation will be generated by this function, outside the loop (in the
3627 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3628 caller, inside the loop.
3630 Background: If the misalignment remains fixed throughout the iterations of
3631 the loop, then both realignment schemes are applicable, and also the
3632 misalignment computation can be done outside LOOP. This is because we are
3633 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3634 are a multiple of VS (the Vector Size), and therefore the misalignment in
3635 different vectorized LOOP iterations is always the same.
3636 The problem arises only if the memory access is in an inner-loop nested
3637 inside LOOP, which is now being vectorized using outer-loop vectorization.
3638 This is the only case when the misalignment of the memory access may not
3639 remain fixed throughout the iterations of the inner-loop (as explained in
3640 detail in vect_supportable_dr_alignment). In this case, not only is the
3641 optimized realignment scheme not applicable, but also the misalignment
3642 computation (and generation of the realignment token that is passed to
3643 REALIGN_LOAD) have to be done inside the loop.
3645 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3646 or not, which in turn determines if the misalignment is computed inside
3647 the inner-loop, or outside LOOP. */
3649 if (init_addr != NULL_TREE || !loop_vinfo)
3651 compute_in_loop = true;
3652 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3656 /* 2. Determine where to generate the extra vector load.
3658 For the optimized realignment scheme, instead of generating two vector
3659 loads in each iteration, we generate a single extra vector load in the
3660 preheader of the loop, and in each iteration reuse the result of the
3661 vector load from the previous iteration. In case the memory access is in
3662 an inner-loop nested inside LOOP, which is now being vectorized using
3663 outer-loop vectorization, we need to determine whether this initial vector
3664 load should be generated at the preheader of the inner-loop, or can be
3665 generated at the preheader of LOOP. If the memory access has no evolution
3666 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3667 to be generated inside LOOP (in the preheader of the inner-loop). */
3669 if (nested_in_vect_loop)
3671 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3672 bool invariant_in_outerloop =
3673 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3674 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3676 else
3677 loop_for_initial_load = loop;
3678 if (at_loop)
3679 *at_loop = loop_for_initial_load;
3681 if (loop_for_initial_load)
3682 pe = loop_preheader_edge (loop_for_initial_load);
3684 /* 3. For the case of the optimized realignment, create the first vector
3685 load at the loop preheader. */
3687 if (alignment_support_scheme == dr_explicit_realign_optimized)
3689 /* Create msq_init = *(floor(p1)) in the loop preheader */
3691 gcc_assert (!compute_in_loop);
3692 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3693 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3694 &init_addr, &inc, true, &inv_p);
3695 new_stmt = gimple_build_assign_with_ops
3696 (BIT_AND_EXPR, NULL_TREE, ptr,
3697 build_int_cst (TREE_TYPE (ptr),
3698 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3699 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3700 gimple_assign_set_lhs (new_stmt, new_temp);
3701 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3702 gcc_assert (!new_bb);
3703 data_ref
3704 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3705 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3706 new_stmt = gimple_build_assign (vec_dest, data_ref);
3707 new_temp = make_ssa_name (vec_dest, new_stmt);
3708 gimple_assign_set_lhs (new_stmt, new_temp);
3709 mark_symbols_for_renaming (new_stmt);
3710 if (pe)
3712 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3713 gcc_assert (!new_bb);
3715 else
3716 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3718 msq_init = gimple_assign_lhs (new_stmt);
3721 /* 4. Create realignment token using a target builtin, if available.
3722 It is done either inside the containing loop, or before LOOP (as
3723 determined above). */
3725 if (targetm.vectorize.builtin_mask_for_load)
3727 tree builtin_decl;
3729 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3730 if (!init_addr)
3732 /* Generate the INIT_ADDR computation outside LOOP. */
3733 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3734 NULL_TREE, loop);
3735 if (loop)
3737 pe = loop_preheader_edge (loop);
3738 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3739 gcc_assert (!new_bb);
3741 else
3742 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3745 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3746 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3747 vec_dest =
3748 vect_create_destination_var (scalar_dest,
3749 gimple_call_return_type (new_stmt));
3750 new_temp = make_ssa_name (vec_dest, new_stmt);
3751 gimple_call_set_lhs (new_stmt, new_temp);
3753 if (compute_in_loop)
3754 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3755 else
3757 /* Generate the misalignment computation outside LOOP. */
3758 pe = loop_preheader_edge (loop);
3759 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3760 gcc_assert (!new_bb);
3763 *realignment_token = gimple_call_lhs (new_stmt);
3765 /* The result of the CALL_EXPR to this builtin is determined from
3766 the value of the parameter and no global variables are touched
3767 which makes the builtin a "const" function. Requiring the
3768 builtin to have the "const" attribute makes it unnecessary
3769 to call mark_call_clobbered. */
3770 gcc_assert (TREE_READONLY (builtin_decl));
3773 if (alignment_support_scheme == dr_explicit_realign)
3774 return msq;
3776 gcc_assert (!compute_in_loop);
3777 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3780 /* 5. Create msq = phi <msq_init, lsq> in loop */
3782 pe = loop_preheader_edge (containing_loop);
3783 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3784 msq = make_ssa_name (vec_dest, NULL);
3785 phi_stmt = create_phi_node (msq, containing_loop->header);
3786 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3787 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3789 return msq;
3793 /* Function vect_strided_load_supported.
3795 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3796 and FALSE otherwise. */
3798 bool
3799 vect_strided_load_supported (tree vectype)
3801 optab perm_even_optab, perm_odd_optab;
3802 enum machine_mode mode;
3804 mode = TYPE_MODE (vectype);
3806 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3807 optab_default);
3808 if (!perm_even_optab)
3810 if (vect_print_dump_info (REPORT_DETAILS))
3811 fprintf (vect_dump, "no optab for perm_even.");
3812 return false;
3815 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3817 if (vect_print_dump_info (REPORT_DETAILS))
3818 fprintf (vect_dump, "perm_even op not supported by target.");
3819 return false;
3822 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3823 optab_default);
3824 if (!perm_odd_optab)
3826 if (vect_print_dump_info (REPORT_DETAILS))
3827 fprintf (vect_dump, "no optab for perm_odd.");
3828 return false;
3831 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3833 if (vect_print_dump_info (REPORT_DETAILS))
3834 fprintf (vect_dump, "perm_odd op not supported by target.");
3835 return false;
3837 return true;
3841 /* Function vect_permute_load_chain.
3843 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3844 a power of 2, generate extract_even/odd stmts to reorder the input data
3845 correctly. Return the final references for loads in RESULT_CHAIN.
3847 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3848 The input is 4 vectors each containing 8 elements. We assign a number to each
3849 element, the input sequence is:
3851 1st vec: 0 1 2 3 4 5 6 7
3852 2nd vec: 8 9 10 11 12 13 14 15
3853 3rd vec: 16 17 18 19 20 21 22 23
3854 4th vec: 24 25 26 27 28 29 30 31
3856 The output sequence should be:
3858 1st vec: 0 4 8 12 16 20 24 28
3859 2nd vec: 1 5 9 13 17 21 25 29
3860 3rd vec: 2 6 10 14 18 22 26 30
3861 4th vec: 3 7 11 15 19 23 27 31
3863 i.e., the first output vector should contain the first elements of each
3864 interleaving group, etc.
3866 We use extract_even/odd instructions to create such output. The input of
3867 each extract_even/odd operation is two vectors
3868 1st vec 2nd vec
3869 0 1 2 3 4 5 6 7
3871 and the output is the vector of extracted even/odd elements. The output of
3872 extract_even will be: 0 2 4 6
3873 and of extract_odd: 1 3 5 7
3876 The permutation is done in log LENGTH stages. In each stage extract_even
3877 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3878 their order. In our example,
3880 E1: extract_even (1st vec, 2nd vec)
3881 E2: extract_odd (1st vec, 2nd vec)
3882 E3: extract_even (3rd vec, 4th vec)
3883 E4: extract_odd (3rd vec, 4th vec)
3885 The output for the first stage will be:
3887 E1: 0 2 4 6 8 10 12 14
3888 E2: 1 3 5 7 9 11 13 15
3889 E3: 16 18 20 22 24 26 28 30
3890 E4: 17 19 21 23 25 27 29 31
3892 In order to proceed and create the correct sequence for the next stage (or
3893 for the correct output, if the second stage is the last one, as in our
3894 example), we first put the output of extract_even operation and then the
3895 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3896 The input for the second stage is:
3898 1st vec (E1): 0 2 4 6 8 10 12 14
3899 2nd vec (E3): 16 18 20 22 24 26 28 30
3900 3rd vec (E2): 1 3 5 7 9 11 13 15
3901 4th vec (E4): 17 19 21 23 25 27 29 31
3903 The output of the second stage:
3905 E1: 0 4 8 12 16 20 24 28
3906 E2: 2 6 10 14 18 22 26 30
3907 E3: 1 5 9 13 17 21 25 29
3908 E4: 3 7 11 15 19 23 27 31
3910 And RESULT_CHAIN after reordering:
3912 1st vec (E1): 0 4 8 12 16 20 24 28
3913 2nd vec (E3): 1 5 9 13 17 21 25 29
3914 3rd vec (E2): 2 6 10 14 18 22 26 30
3915 4th vec (E4): 3 7 11 15 19 23 27 31. */
3917 bool
3918 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3919 unsigned int length,
3920 gimple stmt,
3921 gimple_stmt_iterator *gsi,
3922 VEC(tree,heap) **result_chain)
3924 tree perm_dest, data_ref, first_vect, second_vect;
3925 gimple perm_stmt;
3926 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3927 int i;
3928 unsigned int j;
3930 /* Check that the operation is supported. */
3931 if (!vect_strided_load_supported (vectype))
3932 return false;
3934 *result_chain = VEC_copy (tree, heap, dr_chain);
3935 for (i = 0; i < exact_log2 (length); i++)
3937 for (j = 0; j < length; j +=2)
3939 first_vect = VEC_index (tree, dr_chain, j);
3940 second_vect = VEC_index (tree, dr_chain, j+1);
3942 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3943 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3944 DECL_GIMPLE_REG_P (perm_dest) = 1;
3945 add_referenced_var (perm_dest);
3947 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3948 perm_dest, first_vect,
3949 second_vect);
3951 data_ref = make_ssa_name (perm_dest, perm_stmt);
3952 gimple_assign_set_lhs (perm_stmt, data_ref);
3953 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3954 mark_symbols_for_renaming (perm_stmt);
3956 VEC_replace (tree, *result_chain, j/2, data_ref);
3958 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3959 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3960 DECL_GIMPLE_REG_P (perm_dest) = 1;
3961 add_referenced_var (perm_dest);
3963 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3964 perm_dest, first_vect,
3965 second_vect);
3966 data_ref = make_ssa_name (perm_dest, perm_stmt);
3967 gimple_assign_set_lhs (perm_stmt, data_ref);
3968 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3969 mark_symbols_for_renaming (perm_stmt);
3971 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3973 dr_chain = VEC_copy (tree, heap, *result_chain);
3975 return true;
3979 /* Function vect_transform_strided_load.
3981 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3982 to perform their permutation and ascribe the result vectorized statements to
3983 the scalar statements.
3986 bool
3987 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3988 gimple_stmt_iterator *gsi)
3990 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3991 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3992 gimple next_stmt, new_stmt;
3993 VEC(tree,heap) *result_chain = NULL;
3994 unsigned int i, gap_count;
3995 tree tmp_data_ref;
3997 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3998 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3999 vectors, that are ready for vector computation. */
4000 result_chain = VEC_alloc (tree, heap, size);
4001 /* Permute. */
4002 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
4003 return false;
4005 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4006 Since we scan the chain starting from it's first node, their order
4007 corresponds the order of data-refs in RESULT_CHAIN. */
4008 next_stmt = first_stmt;
4009 gap_count = 1;
4010 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4012 if (!next_stmt)
4013 break;
4015 /* Skip the gaps. Loads created for the gaps will be removed by dead
4016 code elimination pass later. No need to check for the first stmt in
4017 the group, since it always exists.
4018 DR_GROUP_GAP is the number of steps in elements from the previous
4019 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4020 correspond to the gaps. */
4021 if (next_stmt != first_stmt
4022 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4024 gap_count++;
4025 continue;
4028 while (next_stmt)
4030 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4031 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4032 copies, and we put the new vector statement in the first available
4033 RELATED_STMT. */
4034 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4035 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4036 else
4038 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4040 gimple prev_stmt =
4041 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4042 gimple rel_stmt =
4043 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4044 while (rel_stmt)
4046 prev_stmt = rel_stmt;
4047 rel_stmt =
4048 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4051 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4052 new_stmt;
4056 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4057 gap_count = 1;
4058 /* If NEXT_STMT accesses the same DR as the previous statement,
4059 put the same TMP_DATA_REF as its vectorized statement; otherwise
4060 get the next data-ref from RESULT_CHAIN. */
4061 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4062 break;
4066 VEC_free (tree, heap, result_chain);
4067 return true;
4070 /* Function vect_force_dr_alignment_p.
4072 Returns whether the alignment of a DECL can be forced to be aligned
4073 on ALIGNMENT bit boundary. */
4075 bool
4076 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4078 if (TREE_CODE (decl) != VAR_DECL)
4079 return false;
4081 if (DECL_EXTERNAL (decl))
4082 return false;
4084 if (TREE_ASM_WRITTEN (decl))
4085 return false;
4087 if (TREE_STATIC (decl))
4088 return (alignment <= MAX_OFILE_ALIGNMENT);
4089 else
4090 return (alignment <= MAX_STACK_ALIGNMENT);
4094 /* Return whether the data reference DR is supported with respect to its
4095 alignment.
4096 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4097 it is aligned, i.e., check if it is possible to vectorize it with different
4098 alignment. */
4100 enum dr_alignment_support
4101 vect_supportable_dr_alignment (struct data_reference *dr,
4102 bool check_aligned_accesses)
4104 gimple stmt = DR_STMT (dr);
4105 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4106 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4107 enum machine_mode mode = TYPE_MODE (vectype);
4108 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4109 struct loop *vect_loop = NULL;
4110 bool nested_in_vect_loop = false;
4112 if (aligned_access_p (dr) && !check_aligned_accesses)
4113 return dr_aligned;
4115 if (loop_vinfo)
4117 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4118 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4121 /* Possibly unaligned access. */
4123 /* We can choose between using the implicit realignment scheme (generating
4124 a misaligned_move stmt) and the explicit realignment scheme (generating
4125 aligned loads with a REALIGN_LOAD). There are two variants to the
4126 explicit realignment scheme: optimized, and unoptimized.
4127 We can optimize the realignment only if the step between consecutive
4128 vector loads is equal to the vector size. Since the vector memory
4129 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4130 is guaranteed that the misalignment amount remains the same throughout the
4131 execution of the vectorized loop. Therefore, we can create the
4132 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4133 at the loop preheader.
4135 However, in the case of outer-loop vectorization, when vectorizing a
4136 memory access in the inner-loop nested within the LOOP that is now being
4137 vectorized, while it is guaranteed that the misalignment of the
4138 vectorized memory access will remain the same in different outer-loop
4139 iterations, it is *not* guaranteed that is will remain the same throughout
4140 the execution of the inner-loop. This is because the inner-loop advances
4141 with the original scalar step (and not in steps of VS). If the inner-loop
4142 step happens to be a multiple of VS, then the misalignment remains fixed
4143 and we can use the optimized realignment scheme. For example:
4145 for (i=0; i<N; i++)
4146 for (j=0; j<M; j++)
4147 s += a[i+j];
4149 When vectorizing the i-loop in the above example, the step between
4150 consecutive vector loads is 1, and so the misalignment does not remain
4151 fixed across the execution of the inner-loop, and the realignment cannot
4152 be optimized (as illustrated in the following pseudo vectorized loop):
4154 for (i=0; i<N; i+=4)
4155 for (j=0; j<M; j++){
4156 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4157 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4158 // (assuming that we start from an aligned address).
4161 We therefore have to use the unoptimized realignment scheme:
4163 for (i=0; i<N; i+=4)
4164 for (j=k; j<M; j+=4)
4165 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4166 // that the misalignment of the initial address is
4167 // 0).
4169 The loop can then be vectorized as follows:
4171 for (k=0; k<4; k++){
4172 rt = get_realignment_token (&vp[k]);
4173 for (i=0; i<N; i+=4){
4174 v1 = vp[i+k];
4175 for (j=k; j<M; j+=4){
4176 v2 = vp[i+j+VS-1];
4177 va = REALIGN_LOAD <v1,v2,rt>;
4178 vs += va;
4179 v1 = v2;
4182 } */
4184 if (DR_IS_READ (dr))
4186 bool is_packed = false;
4187 tree type = (TREE_TYPE (DR_REF (dr)));
4189 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4190 && (!targetm.vectorize.builtin_mask_for_load
4191 || targetm.vectorize.builtin_mask_for_load ()))
4193 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4194 if ((nested_in_vect_loop
4195 && (TREE_INT_CST_LOW (DR_STEP (dr))
4196 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4197 || !loop_vinfo)
4198 return dr_explicit_realign;
4199 else
4200 return dr_explicit_realign_optimized;
4202 if (!known_alignment_for_access_p (dr))
4204 tree ba = DR_BASE_OBJECT (dr);
4206 if (ba)
4207 is_packed = contains_packed_reference (ba);
4210 if (targetm.vectorize.
4211 support_vector_misalignment (mode, type,
4212 DR_MISALIGNMENT (dr), is_packed))
4213 /* Can't software pipeline the loads, but can at least do them. */
4214 return dr_unaligned_supported;
4216 else
4218 bool is_packed = false;
4219 tree type = (TREE_TYPE (DR_REF (dr)));
4221 if (!known_alignment_for_access_p (dr))
4223 tree ba = DR_BASE_OBJECT (dr);
4225 if (ba)
4226 is_packed = contains_packed_reference (ba);
4229 if (targetm.vectorize.
4230 support_vector_misalignment (mode, type,
4231 DR_MISALIGNMENT (dr), is_packed))
4232 return dr_unaligned_supported;
4235 /* Unsupported. */
4236 return dr_unaligned_unsupported;