gcc/
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob0d1e33817383c8dc3e659241474fc003f1ac1415
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
41 #include "toplev.h"
43 /* Need to include rtl.h, expr.h, etc. for optabs. */
44 #include "expr.h"
45 #include "optabs.h"
47 /* Return the smallest scalar part of STMT.
48 This is used to determine the vectype of the stmt. We generally set the
49 vectype according to the type of the result (lhs). For stmts whose
50 result-type is different than the type of the arguments (e.g., demotion,
51 promotion), vectype will be reset appropriately (later). Note that we have
52 to visit the smallest datatype in this function, because that determines the
53 VF. If the smallest datatype in the loop is present only as the rhs of a
54 promotion operation - we'd miss it.
55 Such a case, where a variable of this datatype does not appear in the lhs
56 anywhere in the loop, can only occur if it's an invariant: e.g.:
57 'int_x = (int) short_inv', which we'd expect to have been optimized away by
58 invariant motion. However, we cannot rely on invariant motion to always
59 take invariants out of the loop, and so in the case of promotion we also
60 have to check the rhs.
61 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
62 types. */
64 tree
65 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
66 HOST_WIDE_INT *rhs_size_unit)
68 tree scalar_type = gimple_expr_type (stmt);
69 HOST_WIDE_INT lhs, rhs;
71 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
73 if (is_gimple_assign (stmt)
74 && (gimple_assign_cast_p (stmt)
75 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
76 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
78 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
80 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
81 if (rhs < lhs)
82 scalar_type = rhs_type;
85 *lhs_size_unit = lhs;
86 *rhs_size_unit = rhs;
87 return scalar_type;
91 /* Find the place of the data-ref in STMT in the interleaving chain that starts
92 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
94 int
95 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
97 gimple next_stmt = first_stmt;
98 int result = 0;
100 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
101 return -1;
103 while (next_stmt && next_stmt != stmt)
105 result++;
106 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
109 if (next_stmt)
110 return result;
111 else
112 return -1;
116 /* Function vect_insert_into_interleaving_chain.
118 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
120 static void
121 vect_insert_into_interleaving_chain (struct data_reference *dra,
122 struct data_reference *drb)
124 gimple prev, next;
125 tree next_init;
126 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
127 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
129 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
130 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
131 while (next)
133 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
134 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
136 /* Insert here. */
137 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
138 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
139 return;
141 prev = next;
142 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
145 /* We got to the end of the list. Insert here. */
146 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
147 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
151 /* Function vect_update_interleaving_chain.
153 For two data-refs DRA and DRB that are a part of a chain interleaved data
154 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
156 There are four possible cases:
157 1. New stmts - both DRA and DRB are not a part of any chain:
158 FIRST_DR = DRB
159 NEXT_DR (DRB) = DRA
160 2. DRB is a part of a chain and DRA is not:
161 no need to update FIRST_DR
162 no need to insert DRB
163 insert DRA according to init
164 3. DRA is a part of a chain and DRB is not:
165 if (init of FIRST_DR > init of DRB)
166 FIRST_DR = DRB
167 NEXT(FIRST_DR) = previous FIRST_DR
168 else
169 insert DRB according to its init
170 4. both DRA and DRB are in some interleaving chains:
171 choose the chain with the smallest init of FIRST_DR
172 insert the nodes of the second chain into the first one. */
174 static void
175 vect_update_interleaving_chain (struct data_reference *drb,
176 struct data_reference *dra)
178 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
179 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
180 tree next_init, init_dra_chain, init_drb_chain;
181 gimple first_a, first_b;
182 tree node_init;
183 gimple node, prev, next, first_stmt;
185 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
186 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
188 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
189 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
190 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
191 return;
194 /* 2. DRB is a part of a chain and DRA is not. */
195 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
197 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
198 /* Insert DRA into the chain of DRB. */
199 vect_insert_into_interleaving_chain (dra, drb);
200 return;
203 /* 3. DRA is a part of a chain and DRB is not. */
204 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
206 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
207 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
208 old_first_stmt)));
209 gimple tmp;
211 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
213 /* DRB's init is smaller than the init of the stmt previously marked
214 as the first stmt of the interleaving chain of DRA. Therefore, we
215 update FIRST_STMT and put DRB in the head of the list. */
216 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
217 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
219 /* Update all the stmts in the list to point to the new FIRST_STMT. */
220 tmp = old_first_stmt;
221 while (tmp)
223 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
224 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
227 else
229 /* Insert DRB in the list of DRA. */
230 vect_insert_into_interleaving_chain (drb, dra);
231 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
233 return;
236 /* 4. both DRA and DRB are in some interleaving chains. */
237 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
238 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
239 if (first_a == first_b)
240 return;
241 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
242 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
244 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
246 /* Insert the nodes of DRA chain into the DRB chain.
247 After inserting a node, continue from this node of the DRB chain (don't
248 start from the beginning. */
249 node = DR_GROUP_FIRST_DR (stmtinfo_a);
250 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
251 first_stmt = first_b;
253 else
255 /* Insert the nodes of DRB chain into the DRA chain.
256 After inserting a node, continue from this node of the DRA chain (don't
257 start from the beginning. */
258 node = DR_GROUP_FIRST_DR (stmtinfo_b);
259 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
260 first_stmt = first_a;
263 while (node)
265 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
266 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
267 while (next)
269 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
270 if (tree_int_cst_compare (next_init, node_init) > 0)
272 /* Insert here. */
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
274 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
275 prev = node;
276 break;
278 prev = next;
279 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
281 if (!next)
283 /* We got to the end of the list. Insert here. */
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
285 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
286 prev = node;
288 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
289 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
294 /* Function vect_equal_offsets.
296 Check if OFFSET1 and OFFSET2 are identical expressions. */
298 static bool
299 vect_equal_offsets (tree offset1, tree offset2)
301 bool res;
303 STRIP_NOPS (offset1);
304 STRIP_NOPS (offset2);
306 if (offset1 == offset2)
307 return true;
309 if (TREE_CODE (offset1) != TREE_CODE (offset2)
310 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
311 return false;
313 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
314 TREE_OPERAND (offset2, 0));
316 if (!res || !BINARY_CLASS_P (offset1))
317 return res;
319 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
320 TREE_OPERAND (offset2, 1));
322 return res;
326 /* Check dependence between DRA and DRB for basic block vectorization.
327 If the accesses share same bases and offsets, we can compare their initial
328 constant offsets to decide whether they differ or not. In case of a read-
329 write dependence we check that the load is before the store to ensure that
330 vectorization will not change the order of the accesses. */
332 static bool
333 vect_drs_dependent_in_basic_block (struct data_reference *dra,
334 struct data_reference *drb)
336 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
337 gimple earlier_stmt;
339 /* We only call this function for pairs of loads and stores, but we verify
340 it here. */
341 if (DR_IS_READ (dra) == DR_IS_READ (drb))
343 if (DR_IS_READ (dra))
344 return false;
345 else
346 return true;
349 /* Check that the data-refs have same bases and offsets. If not, we can't
350 determine if they are dependent. */
351 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
352 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
353 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
354 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
355 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
356 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
357 return true;
359 /* Check the types. */
360 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
361 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
363 if (type_size_a != type_size_b
364 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
365 TREE_TYPE (DR_REF (drb))))
366 return true;
368 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
369 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
371 /* Two different locations - no dependence. */
372 if (init_a != init_b)
373 return false;
375 /* We have a read-write dependence. Check that the load is before the store.
376 When we vectorize basic blocks, vector load can be only before
377 corresponding scalar load, and vector store can be only after its
378 corresponding scalar store. So the order of the acceses is preserved in
379 case the load is before the store. */
380 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
381 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
382 return false;
384 return true;
388 /* Function vect_check_interleaving.
390 Check if DRA and DRB are a part of interleaving. In case they are, insert
391 DRA and DRB in an interleaving chain. */
393 static bool
394 vect_check_interleaving (struct data_reference *dra,
395 struct data_reference *drb)
397 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
399 /* Check that the data-refs have same first location (except init) and they
400 are both either store or load (not load and store). */
401 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
402 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
403 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
404 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
405 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
406 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
407 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
408 || DR_IS_READ (dra) != DR_IS_READ (drb))
409 return false;
411 /* Check:
412 1. data-refs are of the same type
413 2. their steps are equal
414 3. the step (if greater than zero) is greater than the difference between
415 data-refs' inits. */
416 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
417 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
419 if (type_size_a != type_size_b
420 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
421 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
422 TREE_TYPE (DR_REF (drb))))
423 return false;
425 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
426 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
427 step = TREE_INT_CST_LOW (DR_STEP (dra));
429 if (init_a > init_b)
431 /* If init_a == init_b + the size of the type * k, we have an interleaving,
432 and DRB is accessed before DRA. */
433 diff_mod_size = (init_a - init_b) % type_size_a;
435 if (step && (init_a - init_b) > step)
436 return false;
438 if (diff_mod_size == 0)
440 vect_update_interleaving_chain (drb, dra);
441 if (vect_print_dump_info (REPORT_DR_DETAILS))
443 fprintf (vect_dump, "Detected interleaving ");
444 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
445 fprintf (vect_dump, " and ");
446 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
448 return true;
451 else
453 /* If init_b == init_a + the size of the type * k, we have an
454 interleaving, and DRA is accessed before DRB. */
455 diff_mod_size = (init_b - init_a) % type_size_a;
457 if (step && (init_b - init_a) > step)
458 return false;
460 if (diff_mod_size == 0)
462 vect_update_interleaving_chain (dra, drb);
463 if (vect_print_dump_info (REPORT_DR_DETAILS))
465 fprintf (vect_dump, "Detected interleaving ");
466 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
467 fprintf (vect_dump, " and ");
468 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
470 return true;
474 return false;
477 /* Check if data references pointed by DR_I and DR_J are same or
478 belong to same interleaving group. Return FALSE if drs are
479 different, otherwise return TRUE. */
481 static bool
482 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
484 gimple stmt_i = DR_STMT (dr_i);
485 gimple stmt_j = DR_STMT (dr_j);
487 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
488 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
489 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
490 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
491 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
492 return true;
493 else
494 return false;
497 /* If address ranges represented by DDR_I and DDR_J are equal,
498 return TRUE, otherwise return FALSE. */
500 static bool
501 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
503 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
504 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
505 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
506 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
507 return true;
508 else
509 return false;
512 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
513 tested at run-time. Return TRUE if DDR was successfully inserted.
514 Return false if versioning is not supported. */
516 static bool
517 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
519 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
521 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
522 return false;
524 if (vect_print_dump_info (REPORT_DR_DETAILS))
526 fprintf (vect_dump, "mark for run-time aliasing test between ");
527 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
528 fprintf (vect_dump, " and ");
529 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
532 if (optimize_loop_nest_for_size_p (loop))
534 if (vect_print_dump_info (REPORT_DR_DETAILS))
535 fprintf (vect_dump, "versioning not supported when optimizing for size.");
536 return false;
539 /* FORNOW: We don't support versioning with outer-loop vectorization. */
540 if (loop->inner)
542 if (vect_print_dump_info (REPORT_DR_DETAILS))
543 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
544 return false;
547 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
548 return true;
552 /* Function vect_analyze_data_ref_dependence.
554 Return TRUE if there (might) exist a dependence between a memory-reference
555 DRA and a memory-reference DRB. When versioning for alias may check a
556 dependence at run-time, return FALSE. Adjust *MAX_VF according to
557 the data dependence. */
559 static bool
560 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
561 loop_vec_info loop_vinfo, int *max_vf,
562 bool *data_dependence_in_bb)
564 unsigned int i;
565 struct loop *loop = NULL;
566 struct data_reference *dra = DDR_A (ddr);
567 struct data_reference *drb = DDR_B (ddr);
568 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
569 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
570 lambda_vector dist_v;
571 unsigned int loop_depth;
573 /* Don't bother to analyze statements marked as unvectorizable. */
574 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
575 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
576 return false;
578 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
580 /* Independent data accesses. */
581 vect_check_interleaving (dra, drb);
582 return false;
585 if (loop_vinfo)
586 loop = LOOP_VINFO_LOOP (loop_vinfo);
588 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
589 return false;
591 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
593 if (loop_vinfo)
595 if (vect_print_dump_info (REPORT_DR_DETAILS))
597 fprintf (vect_dump, "versioning for alias required: "
598 "can't determine dependence between ");
599 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
600 fprintf (vect_dump, " and ");
601 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
604 /* Add to list of ddrs that need to be tested at run-time. */
605 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
608 /* When vectorizing a basic block unknown depnedence can still mean
609 strided access. */
610 if (vect_check_interleaving (dra, drb))
611 return false;
613 if (vect_print_dump_info (REPORT_DR_DETAILS))
615 fprintf (vect_dump, "can't determine dependence between ");
616 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
617 fprintf (vect_dump, " and ");
618 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
621 /* We do not vectorize basic blocks with write-write dependencies. */
622 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
623 return true;
625 /* We deal with read-write dependencies in basic blocks later (by
626 verifying that all the loads in the basic block are before all the
627 stores). */
628 *data_dependence_in_bb = true;
629 return false;
632 /* Versioning for alias is not yet supported for basic block SLP, and
633 dependence distance is unapplicable, hence, in case of known data
634 dependence, basic block vectorization is impossible for now. */
635 if (!loop_vinfo)
637 if (dra != drb && vect_check_interleaving (dra, drb))
638 return false;
640 if (vect_print_dump_info (REPORT_DR_DETAILS))
642 fprintf (vect_dump, "determined dependence between ");
643 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
644 fprintf (vect_dump, " and ");
645 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
648 /* Do not vectorize basic blcoks with write-write dependences. */
649 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
650 return true;
652 /* Check if this dependence is allowed in basic block vectorization. */
653 return vect_drs_dependent_in_basic_block (dra, drb);
656 /* Loop-based vectorization and known data dependence. */
657 if (DDR_NUM_DIST_VECTS (ddr) == 0)
659 if (vect_print_dump_info (REPORT_DR_DETAILS))
661 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
662 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
663 fprintf (vect_dump, " and ");
664 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
666 /* Add to list of ddrs that need to be tested at run-time. */
667 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
670 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
671 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
673 int dist = dist_v[loop_depth];
675 if (vect_print_dump_info (REPORT_DR_DETAILS))
676 fprintf (vect_dump, "dependence distance = %d.", dist);
678 if (dist == 0)
680 if (vect_print_dump_info (REPORT_DR_DETAILS))
682 fprintf (vect_dump, "dependence distance == 0 between ");
683 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
684 fprintf (vect_dump, " and ");
685 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
688 /* For interleaving, mark that there is a read-write dependency if
689 necessary. We check before that one of the data-refs is store. */
690 if (DR_IS_READ (dra))
691 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
692 else
694 if (DR_IS_READ (drb))
695 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
698 continue;
701 if (dist > 0 && DDR_REVERSED_P (ddr))
703 /* If DDR_REVERSED_P the order of the data-refs in DDR was
704 reversed (to make distance vector positive), and the actual
705 distance is negative. */
706 if (vect_print_dump_info (REPORT_DR_DETAILS))
707 fprintf (vect_dump, "dependence distance negative.");
708 continue;
711 if (abs (dist) >= 2
712 && abs (dist) < *max_vf)
714 /* The dependence distance requires reduction of the maximal
715 vectorization factor. */
716 *max_vf = abs (dist);
717 if (vect_print_dump_info (REPORT_DR_DETAILS))
718 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
719 *max_vf);
722 if (abs (dist) >= *max_vf)
724 /* Dependence distance does not create dependence, as far as
725 vectorization is concerned, in this case. */
726 if (vect_print_dump_info (REPORT_DR_DETAILS))
727 fprintf (vect_dump, "dependence distance >= VF.");
728 continue;
731 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
733 fprintf (vect_dump, "not vectorized, possible dependence "
734 "between data-refs ");
735 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
736 fprintf (vect_dump, " and ");
737 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
740 return true;
743 return false;
746 /* Function vect_analyze_data_ref_dependences.
748 Examine all the data references in the loop, and make sure there do not
749 exist any data dependences between them. Set *MAX_VF according to
750 the maximum vectorization factor the data dependences allow. */
752 bool
753 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
754 bb_vec_info bb_vinfo, int *max_vf,
755 bool *data_dependence_in_bb)
757 unsigned int i;
758 VEC (ddr_p, heap) *ddrs = NULL;
759 struct data_dependence_relation *ddr;
761 if (vect_print_dump_info (REPORT_DETAILS))
762 fprintf (vect_dump, "=== vect_analyze_dependences ===");
764 if (loop_vinfo)
765 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
766 else
767 ddrs = BB_VINFO_DDRS (bb_vinfo);
769 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
770 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
771 data_dependence_in_bb))
772 return false;
774 return true;
778 /* Function vect_compute_data_ref_alignment
780 Compute the misalignment of the data reference DR.
782 Output:
783 1. If during the misalignment computation it is found that the data reference
784 cannot be vectorized then false is returned.
785 2. DR_MISALIGNMENT (DR) is defined.
787 FOR NOW: No analysis is actually performed. Misalignment is calculated
788 only for trivial cases. TODO. */
790 static bool
791 vect_compute_data_ref_alignment (struct data_reference *dr)
793 gimple stmt = DR_STMT (dr);
794 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
795 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
796 struct loop *loop = NULL;
797 tree ref = DR_REF (dr);
798 tree vectype;
799 tree base, base_addr;
800 bool base_aligned;
801 tree misalign;
802 tree aligned_to, alignment;
804 if (vect_print_dump_info (REPORT_DETAILS))
805 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
807 if (loop_vinfo)
808 loop = LOOP_VINFO_LOOP (loop_vinfo);
810 /* Initialize misalignment to unknown. */
811 SET_DR_MISALIGNMENT (dr, -1);
813 misalign = DR_INIT (dr);
814 aligned_to = DR_ALIGNED_TO (dr);
815 base_addr = DR_BASE_ADDRESS (dr);
816 vectype = STMT_VINFO_VECTYPE (stmt_info);
818 /* In case the dataref is in an inner-loop of the loop that is being
819 vectorized (LOOP), we use the base and misalignment information
820 relative to the outer-loop (LOOP). This is ok only if the misalignment
821 stays the same throughout the execution of the inner-loop, which is why
822 we have to check that the stride of the dataref in the inner-loop evenly
823 divides by the vector size. */
824 if (loop && nested_in_vect_loop_p (loop, stmt))
826 tree step = DR_STEP (dr);
827 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
829 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
831 if (vect_print_dump_info (REPORT_ALIGNMENT))
832 fprintf (vect_dump, "inner step divides the vector-size.");
833 misalign = STMT_VINFO_DR_INIT (stmt_info);
834 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
835 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
837 else
839 if (vect_print_dump_info (REPORT_ALIGNMENT))
840 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
841 misalign = NULL_TREE;
845 base = build_fold_indirect_ref (base_addr);
846 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
848 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
849 || !misalign)
851 if (vect_print_dump_info (REPORT_ALIGNMENT))
853 fprintf (vect_dump, "Unknown alignment for access: ");
854 print_generic_expr (vect_dump, base, TDF_SLIM);
856 return true;
859 if ((DECL_P (base)
860 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
861 alignment) >= 0)
862 || (TREE_CODE (base_addr) == SSA_NAME
863 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
864 TREE_TYPE (base_addr)))),
865 alignment) >= 0))
866 base_aligned = true;
867 else
868 base_aligned = false;
870 if (!base_aligned)
872 /* Do not change the alignment of global variables if
873 flag_section_anchors is enabled. */
874 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
875 || (TREE_STATIC (base) && flag_section_anchors))
877 if (vect_print_dump_info (REPORT_DETAILS))
879 fprintf (vect_dump, "can't force alignment of ref: ");
880 print_generic_expr (vect_dump, ref, TDF_SLIM);
882 return true;
885 /* Force the alignment of the decl.
886 NOTE: This is the only change to the code we make during
887 the analysis phase, before deciding to vectorize the loop. */
888 if (vect_print_dump_info (REPORT_DETAILS))
890 fprintf (vect_dump, "force alignment of ");
891 print_generic_expr (vect_dump, ref, TDF_SLIM);
894 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
895 DECL_USER_ALIGN (base) = 1;
898 /* At this point we assume that the base is aligned. */
899 gcc_assert (base_aligned
900 || (TREE_CODE (base) == VAR_DECL
901 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
903 /* Modulo alignment. */
904 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
906 if (!host_integerp (misalign, 1))
908 /* Negative or overflowed misalignment value. */
909 if (vect_print_dump_info (REPORT_DETAILS))
910 fprintf (vect_dump, "unexpected misalign value");
911 return false;
914 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
916 if (vect_print_dump_info (REPORT_DETAILS))
918 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
919 print_generic_expr (vect_dump, ref, TDF_SLIM);
922 return true;
926 /* Function vect_compute_data_refs_alignment
928 Compute the misalignment of data references in the loop.
929 Return FALSE if a data reference is found that cannot be vectorized. */
931 static bool
932 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
933 bb_vec_info bb_vinfo)
935 VEC (data_reference_p, heap) *datarefs;
936 struct data_reference *dr;
937 unsigned int i;
939 if (loop_vinfo)
940 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
941 else
942 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
944 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
945 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
946 && !vect_compute_data_ref_alignment (dr))
948 if (bb_vinfo)
950 /* Mark unsupported statement as unvectorizable. */
951 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
952 continue;
954 else
955 return false;
958 return true;
962 /* Function vect_update_misalignment_for_peel
964 DR - the data reference whose misalignment is to be adjusted.
965 DR_PEEL - the data reference whose misalignment is being made
966 zero in the vector loop by the peel.
967 NPEEL - the number of iterations in the peel loop if the misalignment
968 of DR_PEEL is known at compile time. */
970 static void
971 vect_update_misalignment_for_peel (struct data_reference *dr,
972 struct data_reference *dr_peel, int npeel)
974 unsigned int i;
975 VEC(dr_p,heap) *same_align_drs;
976 struct data_reference *current_dr;
977 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
978 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
979 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
980 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
982 /* For interleaved data accesses the step in the loop must be multiplied by
983 the size of the interleaving group. */
984 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
985 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
986 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
987 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
989 /* It can be assumed that the data refs with the same alignment as dr_peel
990 are aligned in the vector loop. */
991 same_align_drs
992 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
993 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
995 if (current_dr != dr)
996 continue;
997 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
998 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
999 SET_DR_MISALIGNMENT (dr, 0);
1000 return;
1003 if (known_alignment_for_access_p (dr)
1004 && known_alignment_for_access_p (dr_peel))
1006 int misal = DR_MISALIGNMENT (dr);
1007 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1008 misal += npeel * dr_size;
1009 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
1010 SET_DR_MISALIGNMENT (dr, misal);
1011 return;
1014 if (vect_print_dump_info (REPORT_DETAILS))
1015 fprintf (vect_dump, "Setting misalignment to -1.");
1016 SET_DR_MISALIGNMENT (dr, -1);
1020 /* Function vect_verify_datarefs_alignment
1022 Return TRUE if all data references in the loop can be
1023 handled with respect to alignment. */
1025 bool
1026 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1028 VEC (data_reference_p, heap) *datarefs;
1029 struct data_reference *dr;
1030 enum dr_alignment_support supportable_dr_alignment;
1031 unsigned int i;
1033 if (loop_vinfo)
1034 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1035 else
1036 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1038 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1040 gimple stmt = DR_STMT (dr);
1041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1043 /* For interleaving, only the alignment of the first access matters.
1044 Skip statements marked as not vectorizable. */
1045 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1046 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1047 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1048 continue;
1050 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1051 if (!supportable_dr_alignment)
1053 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1055 if (DR_IS_READ (dr))
1056 fprintf (vect_dump,
1057 "not vectorized: unsupported unaligned load.");
1058 else
1059 fprintf (vect_dump,
1060 "not vectorized: unsupported unaligned store.");
1062 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1064 return false;
1066 if (supportable_dr_alignment != dr_aligned
1067 && vect_print_dump_info (REPORT_ALIGNMENT))
1068 fprintf (vect_dump, "Vectorizing an unaligned access.");
1070 return true;
1074 /* Function vector_alignment_reachable_p
1076 Return true if vector alignment for DR is reachable by peeling
1077 a few loop iterations. Return false otherwise. */
1079 static bool
1080 vector_alignment_reachable_p (struct data_reference *dr)
1082 gimple stmt = DR_STMT (dr);
1083 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1084 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1086 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1088 /* For interleaved access we peel only if number of iterations in
1089 the prolog loop ({VF - misalignment}), is a multiple of the
1090 number of the interleaved accesses. */
1091 int elem_size, mis_in_elements;
1092 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1094 /* FORNOW: handle only known alignment. */
1095 if (!known_alignment_for_access_p (dr))
1096 return false;
1098 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1099 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1101 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1102 return false;
1105 /* If misalignment is known at the compile time then allow peeling
1106 only if natural alignment is reachable through peeling. */
1107 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1109 HOST_WIDE_INT elmsize =
1110 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1111 if (vect_print_dump_info (REPORT_DETAILS))
1113 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1114 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1116 if (DR_MISALIGNMENT (dr) % elmsize)
1118 if (vect_print_dump_info (REPORT_DETAILS))
1119 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1120 return false;
1124 if (!known_alignment_for_access_p (dr))
1126 tree type = (TREE_TYPE (DR_REF (dr)));
1127 tree ba = DR_BASE_OBJECT (dr);
1128 bool is_packed = false;
1130 if (ba)
1131 is_packed = contains_packed_reference (ba);
1133 if (vect_print_dump_info (REPORT_DETAILS))
1134 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1135 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1136 return true;
1137 else
1138 return false;
1141 return true;
1145 /* Calculate the cost of the memory access represented by DR. */
1147 static void
1148 vect_get_data_access_cost (struct data_reference *dr,
1149 unsigned int *inside_cost,
1150 unsigned int *outside_cost)
1152 gimple stmt = DR_STMT (dr);
1153 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1154 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1155 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1156 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1157 int ncopies = vf / nunits;
1158 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1160 if (!supportable_dr_alignment)
1161 *inside_cost = VECT_MAX_COST;
1162 else
1164 if (DR_IS_READ (dr))
1165 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1166 else
1167 vect_get_store_cost (dr, ncopies, inside_cost);
1170 if (vect_print_dump_info (REPORT_COST))
1171 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1172 "outside_cost = %d.", *inside_cost, *outside_cost);
1176 static hashval_t
1177 vect_peeling_hash (const void *elem)
1179 const struct _vect_peel_info *peel_info;
1181 peel_info = (const struct _vect_peel_info *) elem;
1182 return (hashval_t) peel_info->npeel;
1186 static int
1187 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1189 const struct _vect_peel_info *a, *b;
1191 a = (const struct _vect_peel_info *) elem1;
1192 b = (const struct _vect_peel_info *) elem2;
1193 return (a->npeel == b->npeel);
1197 /* Insert DR into peeling hash table with NPEEL as key. */
1199 static void
1200 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1201 int npeel)
1203 struct _vect_peel_info elem, *slot;
1204 void **new_slot;
1205 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1207 elem.npeel = npeel;
1208 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1209 &elem);
1210 if (slot)
1211 slot->count++;
1212 else
1214 slot = XNEW (struct _vect_peel_info);
1215 slot->npeel = npeel;
1216 slot->dr = dr;
1217 slot->count = 1;
1218 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1219 INSERT);
1220 *new_slot = slot;
1223 if (!supportable_dr_alignment && !flag_vect_cost_model)
1224 slot->count += VECT_MAX_COST;
1228 /* Traverse peeling hash table to find peeling option that aligns maximum
1229 number of data accesses. */
1231 static int
1232 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1234 vect_peel_info elem = (vect_peel_info) *slot;
1235 vect_peel_extended_info max = (vect_peel_extended_info) data;
1237 if (elem->count > max->peel_info.count)
1239 max->peel_info.npeel = elem->npeel;
1240 max->peel_info.count = elem->count;
1241 max->peel_info.dr = elem->dr;
1244 return 1;
1248 /* Traverse peeling hash table and calculate cost for each peeling option.
1249 Find the one with the lowest cost. */
1251 static int
1252 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1254 vect_peel_info elem = (vect_peel_info) *slot;
1255 vect_peel_extended_info min = (vect_peel_extended_info) data;
1256 int save_misalignment, dummy;
1257 unsigned int inside_cost = 0, outside_cost = 0, i;
1258 gimple stmt = DR_STMT (elem->dr);
1259 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1261 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1262 struct data_reference *dr;
1264 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1266 stmt = DR_STMT (dr);
1267 stmt_info = vinfo_for_stmt (stmt);
1268 /* For interleaving, only the alignment of the first access
1269 matters. */
1270 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1271 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1272 continue;
1274 save_misalignment = DR_MISALIGNMENT (dr);
1275 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1276 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1277 SET_DR_MISALIGNMENT (dr, save_misalignment);
1280 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1281 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1283 if (inside_cost < min->inside_cost
1284 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1286 min->inside_cost = inside_cost;
1287 min->outside_cost = outside_cost;
1288 min->peel_info.dr = elem->dr;
1289 min->peel_info.npeel = elem->npeel;
1292 return 1;
1296 /* Choose best peeling option by traversing peeling hash table and either
1297 choosing an option with the lowest cost (if cost model is enabled) or the
1298 option that aligns as many accesses as possible. */
1300 static struct data_reference *
1301 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1302 unsigned int *npeel)
1304 struct _vect_peel_extended_info res;
1306 res.peel_info.dr = NULL;
1308 if (flag_vect_cost_model)
1310 res.inside_cost = INT_MAX;
1311 res.outside_cost = INT_MAX;
1312 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1313 vect_peeling_hash_get_lowest_cost, &res);
1315 else
1317 res.peel_info.count = 0;
1318 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1319 vect_peeling_hash_get_most_frequent, &res);
1322 *npeel = res.peel_info.npeel;
1323 return res.peel_info.dr;
1327 /* Function vect_enhance_data_refs_alignment
1329 This pass will use loop versioning and loop peeling in order to enhance
1330 the alignment of data references in the loop.
1332 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1333 original loop is to be vectorized. Any other loops that are created by
1334 the transformations performed in this pass - are not supposed to be
1335 vectorized. This restriction will be relaxed.
1337 This pass will require a cost model to guide it whether to apply peeling
1338 or versioning or a combination of the two. For example, the scheme that
1339 intel uses when given a loop with several memory accesses, is as follows:
1340 choose one memory access ('p') which alignment you want to force by doing
1341 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1342 other accesses are not necessarily aligned, or (2) use loop versioning to
1343 generate one loop in which all accesses are aligned, and another loop in
1344 which only 'p' is necessarily aligned.
1346 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1347 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1348 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1350 Devising a cost model is the most critical aspect of this work. It will
1351 guide us on which access to peel for, whether to use loop versioning, how
1352 many versions to create, etc. The cost model will probably consist of
1353 generic considerations as well as target specific considerations (on
1354 powerpc for example, misaligned stores are more painful than misaligned
1355 loads).
1357 Here are the general steps involved in alignment enhancements:
1359 -- original loop, before alignment analysis:
1360 for (i=0; i<N; i++){
1361 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1362 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1365 -- After vect_compute_data_refs_alignment:
1366 for (i=0; i<N; i++){
1367 x = q[i]; # DR_MISALIGNMENT(q) = 3
1368 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1371 -- Possibility 1: we do loop versioning:
1372 if (p is aligned) {
1373 for (i=0; i<N; i++){ # loop 1A
1374 x = q[i]; # DR_MISALIGNMENT(q) = 3
1375 p[i] = y; # DR_MISALIGNMENT(p) = 0
1378 else {
1379 for (i=0; i<N; i++){ # loop 1B
1380 x = q[i]; # DR_MISALIGNMENT(q) = 3
1381 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1385 -- Possibility 2: we do loop peeling:
1386 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1387 x = q[i];
1388 p[i] = y;
1390 for (i = 3; i < N; i++){ # loop 2A
1391 x = q[i]; # DR_MISALIGNMENT(q) = 0
1392 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1395 -- Possibility 3: combination of loop peeling and versioning:
1396 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1397 x = q[i];
1398 p[i] = y;
1400 if (p is aligned) {
1401 for (i = 3; i<N; i++){ # loop 3A
1402 x = q[i]; # DR_MISALIGNMENT(q) = 0
1403 p[i] = y; # DR_MISALIGNMENT(p) = 0
1406 else {
1407 for (i = 3; i<N; i++){ # loop 3B
1408 x = q[i]; # DR_MISALIGNMENT(q) = 0
1409 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1413 These loops are later passed to loop_transform to be vectorized. The
1414 vectorizer will use the alignment information to guide the transformation
1415 (whether to generate regular loads/stores, or with special handling for
1416 misalignment). */
1418 bool
1419 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1421 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1422 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1423 enum dr_alignment_support supportable_dr_alignment;
1424 struct data_reference *dr0 = NULL, *first_store = NULL;
1425 struct data_reference *dr;
1426 unsigned int i, j;
1427 bool do_peeling = false;
1428 bool do_versioning = false;
1429 bool stat;
1430 gimple stmt;
1431 stmt_vec_info stmt_info;
1432 int vect_versioning_for_alias_required;
1433 unsigned int npeel = 0;
1434 bool all_misalignments_unknown = true;
1435 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1436 unsigned possible_npeel_number = 1;
1437 tree vectype;
1438 unsigned int nelements, mis, same_align_drs_max = 0;
1440 if (vect_print_dump_info (REPORT_DETAILS))
1441 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1443 /* While cost model enhancements are expected in the future, the high level
1444 view of the code at this time is as follows:
1446 A) If there is a misaligned access then see if peeling to align
1447 this access can make all data references satisfy
1448 vect_supportable_dr_alignment. If so, update data structures
1449 as needed and return true.
1451 B) If peeling wasn't possible and there is a data reference with an
1452 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1453 then see if loop versioning checks can be used to make all data
1454 references satisfy vect_supportable_dr_alignment. If so, update
1455 data structures as needed and return true.
1457 C) If neither peeling nor versioning were successful then return false if
1458 any data reference does not satisfy vect_supportable_dr_alignment.
1460 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1462 Note, Possibility 3 above (which is peeling and versioning together) is not
1463 being done at this time. */
1465 /* (1) Peeling to force alignment. */
1467 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1468 Considerations:
1469 + How many accesses will become aligned due to the peeling
1470 - How many accesses will become unaligned due to the peeling,
1471 and the cost of misaligned accesses.
1472 - The cost of peeling (the extra runtime checks, the increase
1473 in code size). */
1475 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1477 stmt = DR_STMT (dr);
1478 stmt_info = vinfo_for_stmt (stmt);
1480 /* For interleaving, only the alignment of the first access
1481 matters. */
1482 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1483 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1484 continue;
1486 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1487 do_peeling = vector_alignment_reachable_p (dr);
1488 if (do_peeling)
1490 if (known_alignment_for_access_p (dr))
1492 unsigned int npeel_tmp;
1494 /* Save info about DR in the hash table. */
1495 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1496 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1497 htab_create (1, vect_peeling_hash,
1498 vect_peeling_hash_eq, free);
1500 vectype = STMT_VINFO_VECTYPE (stmt_info);
1501 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1502 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1503 TREE_TYPE (DR_REF (dr))));
1504 npeel_tmp = (nelements - mis) % vf;
1506 /* For multiple types, it is possible that the bigger type access
1507 will have more than one peeling option. E.g., a loop with two
1508 types: one of size (vector size / 4), and the other one of
1509 size (vector size / 8). Vectorization factor will 8. If both
1510 access are misaligned by 3, the first one needs one scalar
1511 iteration to be aligned, and the second one needs 5. But the
1512 the first one will be aligned also by peeling 5 scalar
1513 iterations, and in that case both accesses will be aligned.
1514 Hence, except for the immediate peeling amount, we also want
1515 to try to add full vector size, while we don't exceed
1516 vectorization factor.
1517 We do this automtically for cost model, since we calculate cost
1518 for every peeling option. */
1519 if (!flag_vect_cost_model)
1520 possible_npeel_number = vf /nelements;
1522 /* Handle the aligned case. We may decide to align some other
1523 access, making DR unaligned. */
1524 if (DR_MISALIGNMENT (dr) == 0)
1526 npeel_tmp = 0;
1527 if (!flag_vect_cost_model)
1528 possible_npeel_number++;
1531 for (j = 0; j < possible_npeel_number; j++)
1533 gcc_assert (npeel_tmp <= vf);
1534 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1535 npeel_tmp += nelements;
1538 all_misalignments_unknown = false;
1539 /* Data-ref that was chosen for the case that all the
1540 misalignments are unknown is not relevant anymore, since we
1541 have a data-ref with known alignment. */
1542 dr0 = NULL;
1544 else
1546 /* If we don't know all the misalignment values, we prefer
1547 peeling for data-ref that has maximum number of data-refs
1548 with the same alignment, unless the target prefers to align
1549 stores over load. */
1550 if (all_misalignments_unknown)
1552 if (same_align_drs_max < VEC_length (dr_p,
1553 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1554 || !dr0)
1556 same_align_drs_max = VEC_length (dr_p,
1557 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1558 dr0 = dr;
1561 if (!first_store && DR_IS_WRITE (dr))
1562 first_store = dr;
1565 /* If there are both known and unknown misaligned accesses in the
1566 loop, we choose peeling amount according to the known
1567 accesses. */
1570 if (!supportable_dr_alignment)
1572 dr0 = dr;
1573 if (!first_store && DR_IS_WRITE (dr))
1574 first_store = dr;
1578 else
1580 if (!aligned_access_p (dr))
1582 if (vect_print_dump_info (REPORT_DETAILS))
1583 fprintf (vect_dump, "vector alignment may not be reachable");
1585 break;
1590 vect_versioning_for_alias_required
1591 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1593 /* Temporarily, if versioning for alias is required, we disable peeling
1594 until we support peeling and versioning. Often peeling for alignment
1595 will require peeling for loop-bound, which in turn requires that we
1596 know how to adjust the loop ivs after the loop. */
1597 if (vect_versioning_for_alias_required
1598 || !vect_can_advance_ivs_p (loop_vinfo)
1599 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1600 do_peeling = false;
1602 if (do_peeling && all_misalignments_unknown
1603 && vect_supportable_dr_alignment (dr0, false))
1606 /* Check if the target requires to prefer stores over loads, i.e., if
1607 misaligned stores are more expensive than misaligned loads (taking
1608 drs with same alignment into account). */
1609 if (first_store && DR_IS_READ (dr0))
1611 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1612 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1613 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1614 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1616 vect_get_data_access_cost (dr0, &load_inside_cost,
1617 &load_outside_cost);
1618 vect_get_data_access_cost (first_store, &store_inside_cost,
1619 &store_outside_cost);
1621 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1622 aligning the load DR0). */
1623 load_inside_penalty = store_inside_cost;
1624 load_outside_penalty = store_outside_cost;
1625 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1626 (vinfo_for_stmt (DR_STMT (first_store))),
1627 i, dr);
1628 i++)
1629 if (DR_IS_READ (dr))
1631 load_inside_penalty += load_inside_cost;
1632 load_outside_penalty += load_outside_cost;
1634 else
1636 load_inside_penalty += store_inside_cost;
1637 load_outside_penalty += store_outside_cost;
1640 /* Calculate the penalty for leaving DR0 unaligned (by
1641 aligning the FIRST_STORE). */
1642 store_inside_penalty = load_inside_cost;
1643 store_outside_penalty = load_outside_cost;
1644 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1645 (vinfo_for_stmt (DR_STMT (dr0))),
1646 i, dr);
1647 i++)
1648 if (DR_IS_READ (dr))
1650 store_inside_penalty += load_inside_cost;
1651 store_outside_penalty += load_outside_cost;
1653 else
1655 store_inside_penalty += store_inside_cost;
1656 store_outside_penalty += store_outside_cost;
1659 if (load_inside_penalty > store_inside_penalty
1660 || (load_inside_penalty == store_inside_penalty
1661 && load_outside_penalty > store_outside_penalty))
1662 dr0 = first_store;
1665 /* In case there are only loads with different unknown misalignments, use
1666 peeling only if it may help to align other accesses in the loop. */
1667 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1668 (vinfo_for_stmt (DR_STMT (dr0))))
1669 && vect_supportable_dr_alignment (dr0, false)
1670 != dr_unaligned_supported)
1671 do_peeling = false;
1674 if (do_peeling && !dr0)
1676 /* Peeling is possible, but there is no data access that is not supported
1677 unless aligned. So we try to choose the best possible peeling. */
1679 /* We should get here only if there are drs with known misalignment. */
1680 gcc_assert (!all_misalignments_unknown);
1682 /* Choose the best peeling from the hash table. */
1683 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1684 if (!dr0 || !npeel)
1685 do_peeling = false;
1688 if (do_peeling)
1690 stmt = DR_STMT (dr0);
1691 stmt_info = vinfo_for_stmt (stmt);
1692 vectype = STMT_VINFO_VECTYPE (stmt_info);
1693 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1695 if (known_alignment_for_access_p (dr0))
1697 if (!npeel)
1699 /* Since it's known at compile time, compute the number of
1700 iterations in the peeled loop (the peeling factor) for use in
1701 updating DR_MISALIGNMENT values. The peeling factor is the
1702 vectorization factor minus the misalignment as an element
1703 count. */
1704 mis = DR_MISALIGNMENT (dr0);
1705 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1706 npeel = nelements - mis;
1709 /* For interleaved data access every iteration accesses all the
1710 members of the group, therefore we divide the number of iterations
1711 by the group size. */
1712 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1713 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1714 npeel /= DR_GROUP_SIZE (stmt_info);
1716 if (vect_print_dump_info (REPORT_DETAILS))
1717 fprintf (vect_dump, "Try peeling by %d", npeel);
1720 /* Ensure that all data refs can be vectorized after the peel. */
1721 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1723 int save_misalignment;
1725 if (dr == dr0)
1726 continue;
1728 stmt = DR_STMT (dr);
1729 stmt_info = vinfo_for_stmt (stmt);
1730 /* For interleaving, only the alignment of the first access
1731 matters. */
1732 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1733 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1734 continue;
1736 save_misalignment = DR_MISALIGNMENT (dr);
1737 vect_update_misalignment_for_peel (dr, dr0, npeel);
1738 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1739 SET_DR_MISALIGNMENT (dr, save_misalignment);
1741 if (!supportable_dr_alignment)
1743 do_peeling = false;
1744 break;
1748 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1750 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1751 if (!stat)
1752 do_peeling = false;
1753 else
1754 return stat;
1757 if (do_peeling)
1759 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1760 If the misalignment of DR_i is identical to that of dr0 then set
1761 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1762 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1763 by the peeling factor times the element size of DR_i (MOD the
1764 vectorization factor times the size). Otherwise, the
1765 misalignment of DR_i must be set to unknown. */
1766 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1767 if (dr != dr0)
1768 vect_update_misalignment_for_peel (dr, dr0, npeel);
1770 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1771 if (npeel)
1772 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1773 else
1774 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1775 SET_DR_MISALIGNMENT (dr0, 0);
1776 if (vect_print_dump_info (REPORT_ALIGNMENT))
1777 fprintf (vect_dump, "Alignment of access forced using peeling.");
1779 if (vect_print_dump_info (REPORT_DETAILS))
1780 fprintf (vect_dump, "Peeling for alignment will be applied.");
1782 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1783 gcc_assert (stat);
1784 return stat;
1789 /* (2) Versioning to force alignment. */
1791 /* Try versioning if:
1792 1) flag_tree_vect_loop_version is TRUE
1793 2) optimize loop for speed
1794 3) there is at least one unsupported misaligned data ref with an unknown
1795 misalignment, and
1796 4) all misaligned data refs with a known misalignment are supported, and
1797 5) the number of runtime alignment checks is within reason. */
1799 do_versioning =
1800 flag_tree_vect_loop_version
1801 && optimize_loop_nest_for_speed_p (loop)
1802 && (!loop->inner); /* FORNOW */
1804 if (do_versioning)
1806 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1808 stmt = DR_STMT (dr);
1809 stmt_info = vinfo_for_stmt (stmt);
1811 /* For interleaving, only the alignment of the first access
1812 matters. */
1813 if (aligned_access_p (dr)
1814 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1815 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1816 continue;
1818 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1820 if (!supportable_dr_alignment)
1822 gimple stmt;
1823 int mask;
1824 tree vectype;
1826 if (known_alignment_for_access_p (dr)
1827 || VEC_length (gimple,
1828 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1829 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1831 do_versioning = false;
1832 break;
1835 stmt = DR_STMT (dr);
1836 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1837 gcc_assert (vectype);
1839 /* The rightmost bits of an aligned address must be zeros.
1840 Construct the mask needed for this test. For example,
1841 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1842 mask must be 15 = 0xf. */
1843 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1845 /* FORNOW: use the same mask to test all potentially unaligned
1846 references in the loop. The vectorizer currently supports
1847 a single vector size, see the reference to
1848 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1849 vectorization factor is computed. */
1850 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1851 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1852 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1853 VEC_safe_push (gimple, heap,
1854 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1855 DR_STMT (dr));
1859 /* Versioning requires at least one misaligned data reference. */
1860 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1861 do_versioning = false;
1862 else if (!do_versioning)
1863 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1866 if (do_versioning)
1868 VEC(gimple,heap) *may_misalign_stmts
1869 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1870 gimple stmt;
1872 /* It can now be assumed that the data references in the statements
1873 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1874 of the loop being vectorized. */
1875 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1877 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1878 dr = STMT_VINFO_DATA_REF (stmt_info);
1879 SET_DR_MISALIGNMENT (dr, 0);
1880 if (vect_print_dump_info (REPORT_ALIGNMENT))
1881 fprintf (vect_dump, "Alignment of access forced using versioning.");
1884 if (vect_print_dump_info (REPORT_DETAILS))
1885 fprintf (vect_dump, "Versioning for alignment will be applied.");
1887 /* Peeling and versioning can't be done together at this time. */
1888 gcc_assert (! (do_peeling && do_versioning));
1890 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1891 gcc_assert (stat);
1892 return stat;
1895 /* This point is reached if neither peeling nor versioning is being done. */
1896 gcc_assert (! (do_peeling || do_versioning));
1898 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1899 return stat;
1903 /* Function vect_find_same_alignment_drs.
1905 Update group and alignment relations according to the chosen
1906 vectorization factor. */
1908 static void
1909 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1910 loop_vec_info loop_vinfo)
1912 unsigned int i;
1913 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1914 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1915 struct data_reference *dra = DDR_A (ddr);
1916 struct data_reference *drb = DDR_B (ddr);
1917 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1918 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1919 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1920 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1921 lambda_vector dist_v;
1922 unsigned int loop_depth;
1924 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1925 return;
1927 if (dra == drb)
1928 return;
1930 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1931 return;
1933 /* Loop-based vectorization and known data dependence. */
1934 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1935 return;
1937 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1938 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1940 int dist = dist_v[loop_depth];
1942 if (vect_print_dump_info (REPORT_DR_DETAILS))
1943 fprintf (vect_dump, "dependence distance = %d.", dist);
1945 /* Same loop iteration. */
1946 if (dist == 0
1947 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1949 /* Two references with distance zero have the same alignment. */
1950 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1951 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1952 if (vect_print_dump_info (REPORT_ALIGNMENT))
1953 fprintf (vect_dump, "accesses have the same alignment.");
1954 if (vect_print_dump_info (REPORT_DR_DETAILS))
1956 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1957 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1958 fprintf (vect_dump, " and ");
1959 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1966 /* Function vect_analyze_data_refs_alignment
1968 Analyze the alignment of the data-references in the loop.
1969 Return FALSE if a data reference is found that cannot be vectorized. */
1971 bool
1972 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1973 bb_vec_info bb_vinfo)
1975 if (vect_print_dump_info (REPORT_DETAILS))
1976 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1978 /* Mark groups of data references with same alignment using
1979 data dependence information. */
1980 if (loop_vinfo)
1982 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1983 struct data_dependence_relation *ddr;
1984 unsigned int i;
1986 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
1987 vect_find_same_alignment_drs (ddr, loop_vinfo);
1990 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1992 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1993 fprintf (vect_dump,
1994 "not vectorized: can't calculate alignment for data ref.");
1995 return false;
1998 return true;
2002 /* Analyze groups of strided accesses: check that DR belongs to a group of
2003 strided accesses of legal size, step, etc. Detect gaps, single element
2004 interleaving, and other special cases. Set strided access info.
2005 Collect groups of strided stores for further use in SLP analysis. */
2007 static bool
2008 vect_analyze_group_access (struct data_reference *dr)
2010 tree step = DR_STEP (dr);
2011 tree scalar_type = TREE_TYPE (DR_REF (dr));
2012 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2013 gimple stmt = DR_STMT (dr);
2014 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2015 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2016 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2017 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2018 HOST_WIDE_INT stride;
2019 bool slp_impossible = false;
2021 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2022 interleaving group (including gaps). */
2023 stride = dr_step / type_size;
2025 /* Not consecutive access is possible only if it is a part of interleaving. */
2026 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2028 /* Check if it this DR is a part of interleaving, and is a single
2029 element of the group that is accessed in the loop. */
2031 /* Gaps are supported only for loads. STEP must be a multiple of the type
2032 size. The size of the group must be a power of 2. */
2033 if (DR_IS_READ (dr)
2034 && (dr_step % type_size) == 0
2035 && stride > 0
2036 && exact_log2 (stride) != -1)
2038 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2039 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2040 if (vect_print_dump_info (REPORT_DR_DETAILS))
2042 fprintf (vect_dump, "Detected single element interleaving ");
2043 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2044 fprintf (vect_dump, " step ");
2045 print_generic_expr (vect_dump, step, TDF_SLIM);
2047 return true;
2050 if (vect_print_dump_info (REPORT_DETAILS))
2052 fprintf (vect_dump, "not consecutive access ");
2053 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2056 if (bb_vinfo)
2058 /* Mark the statement as unvectorizable. */
2059 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2060 return true;
2063 return false;
2066 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2068 /* First stmt in the interleaving chain. Check the chain. */
2069 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2070 struct data_reference *data_ref = dr;
2071 unsigned int count = 1;
2072 tree next_step;
2073 tree prev_init = DR_INIT (data_ref);
2074 gimple prev = stmt;
2075 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2077 while (next)
2079 /* Skip same data-refs. In case that two or more stmts share
2080 data-ref (supported only for loads), we vectorize only the first
2081 stmt, and the rest get their vectorized loads from the first
2082 one. */
2083 if (!tree_int_cst_compare (DR_INIT (data_ref),
2084 DR_INIT (STMT_VINFO_DATA_REF (
2085 vinfo_for_stmt (next)))))
2087 if (DR_IS_WRITE (data_ref))
2089 if (vect_print_dump_info (REPORT_DETAILS))
2090 fprintf (vect_dump, "Two store stmts share the same dr.");
2091 return false;
2094 /* Check that there is no load-store dependencies for this loads
2095 to prevent a case of load-store-load to the same location. */
2096 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2097 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2099 if (vect_print_dump_info (REPORT_DETAILS))
2100 fprintf (vect_dump,
2101 "READ_WRITE dependence in interleaving.");
2102 return false;
2105 /* For load use the same data-ref load. */
2106 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2108 prev = next;
2109 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2110 continue;
2112 prev = next;
2114 /* Check that all the accesses have the same STEP. */
2115 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2116 if (tree_int_cst_compare (step, next_step))
2118 if (vect_print_dump_info (REPORT_DETAILS))
2119 fprintf (vect_dump, "not consecutive access in interleaving");
2120 return false;
2123 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2124 /* Check that the distance between two accesses is equal to the type
2125 size. Otherwise, we have gaps. */
2126 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2127 - TREE_INT_CST_LOW (prev_init)) / type_size;
2128 if (diff != 1)
2130 /* FORNOW: SLP of accesses with gaps is not supported. */
2131 slp_impossible = true;
2132 if (DR_IS_WRITE (data_ref))
2134 if (vect_print_dump_info (REPORT_DETAILS))
2135 fprintf (vect_dump, "interleaved store with gaps");
2136 return false;
2139 gaps += diff - 1;
2142 /* Store the gap from the previous member of the group. If there is no
2143 gap in the access, DR_GROUP_GAP is always 1. */
2144 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2146 prev_init = DR_INIT (data_ref);
2147 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2148 /* Count the number of data-refs in the chain. */
2149 count++;
2152 /* COUNT is the number of accesses found, we multiply it by the size of
2153 the type to get COUNT_IN_BYTES. */
2154 count_in_bytes = type_size * count;
2156 /* Check that the size of the interleaving (including gaps) is not
2157 greater than STEP. */
2158 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2160 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump, "interleaving size is greater than step for ");
2163 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2165 return false;
2168 /* Check that the size of the interleaving is equal to STEP for stores,
2169 i.e., that there are no gaps. */
2170 if (dr_step && dr_step != count_in_bytes)
2172 if (DR_IS_READ (dr))
2174 slp_impossible = true;
2175 /* There is a gap after the last load in the group. This gap is a
2176 difference between the stride and the number of elements. When
2177 there is no gap, this difference should be 0. */
2178 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2180 else
2182 if (vect_print_dump_info (REPORT_DETAILS))
2183 fprintf (vect_dump, "interleaved store with gaps");
2184 return false;
2188 /* Check that STEP is a multiple of type size. */
2189 if (dr_step && (dr_step % type_size) != 0)
2191 if (vect_print_dump_info (REPORT_DETAILS))
2193 fprintf (vect_dump, "step is not a multiple of type size: step ");
2194 print_generic_expr (vect_dump, step, TDF_SLIM);
2195 fprintf (vect_dump, " size ");
2196 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2197 TDF_SLIM);
2199 return false;
2202 /* FORNOW: we handle only interleaving that is a power of 2.
2203 We don't fail here if it may be still possible to vectorize the
2204 group using SLP. If not, the size of the group will be checked in
2205 vect_analyze_operations, and the vectorization will fail. */
2206 if (exact_log2 (stride) == -1)
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "interleaving is not a power of 2");
2211 if (slp_impossible)
2212 return false;
2215 if (stride == 0)
2216 stride = count;
2218 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2219 if (vect_print_dump_info (REPORT_DETAILS))
2220 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2222 /* SLP: create an SLP data structure for every interleaving group of
2223 stores for further analysis in vect_analyse_slp. */
2224 if (DR_IS_WRITE (dr) && !slp_impossible)
2226 if (loop_vinfo)
2227 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2228 stmt);
2229 if (bb_vinfo)
2230 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2231 stmt);
2235 return true;
2239 /* Analyze the access pattern of the data-reference DR.
2240 In case of non-consecutive accesses call vect_analyze_group_access() to
2241 analyze groups of strided accesses. */
2243 static bool
2244 vect_analyze_data_ref_access (struct data_reference *dr)
2246 tree step = DR_STEP (dr);
2247 tree scalar_type = TREE_TYPE (DR_REF (dr));
2248 gimple stmt = DR_STMT (dr);
2249 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2250 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2251 struct loop *loop = NULL;
2252 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2254 if (loop_vinfo)
2255 loop = LOOP_VINFO_LOOP (loop_vinfo);
2257 if (loop_vinfo && !step)
2259 if (vect_print_dump_info (REPORT_DETAILS))
2260 fprintf (vect_dump, "bad data-ref access in loop");
2261 return false;
2264 /* Don't allow invariant accesses in loops. */
2265 if (loop_vinfo && dr_step == 0)
2266 return false;
2268 if (loop && nested_in_vect_loop_p (loop, stmt))
2270 /* Interleaved accesses are not yet supported within outer-loop
2271 vectorization for references in the inner-loop. */
2272 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2274 /* For the rest of the analysis we use the outer-loop step. */
2275 step = STMT_VINFO_DR_STEP (stmt_info);
2276 dr_step = TREE_INT_CST_LOW (step);
2278 if (dr_step == 0)
2280 if (vect_print_dump_info (REPORT_ALIGNMENT))
2281 fprintf (vect_dump, "zero step in outer loop.");
2282 if (DR_IS_READ (dr))
2283 return true;
2284 else
2285 return false;
2289 /* Consecutive? */
2290 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2292 /* Mark that it is not interleaving. */
2293 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2294 return true;
2297 if (loop && nested_in_vect_loop_p (loop, stmt))
2299 if (vect_print_dump_info (REPORT_ALIGNMENT))
2300 fprintf (vect_dump, "strided access in outer loop.");
2301 return false;
2304 /* Not consecutive access - check if it's a part of interleaving group. */
2305 return vect_analyze_group_access (dr);
2309 /* Function vect_analyze_data_ref_accesses.
2311 Analyze the access pattern of all the data references in the loop.
2313 FORNOW: the only access pattern that is considered vectorizable is a
2314 simple step 1 (consecutive) access.
2316 FORNOW: handle only arrays and pointer accesses. */
2318 bool
2319 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2321 unsigned int i;
2322 VEC (data_reference_p, heap) *datarefs;
2323 struct data_reference *dr;
2325 if (vect_print_dump_info (REPORT_DETAILS))
2326 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2328 if (loop_vinfo)
2329 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2330 else
2331 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2333 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2334 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2335 && !vect_analyze_data_ref_access (dr))
2337 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2338 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2340 if (bb_vinfo)
2342 /* Mark the statement as not vectorizable. */
2343 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2344 continue;
2346 else
2347 return false;
2350 return true;
2353 /* Function vect_prune_runtime_alias_test_list.
2355 Prune a list of ddrs to be tested at run-time by versioning for alias.
2356 Return FALSE if resulting list of ddrs is longer then allowed by
2357 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2359 bool
2360 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2362 VEC (ddr_p, heap) * ddrs =
2363 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2364 unsigned i, j;
2366 if (vect_print_dump_info (REPORT_DETAILS))
2367 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2369 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2371 bool found;
2372 ddr_p ddr_i;
2374 ddr_i = VEC_index (ddr_p, ddrs, i);
2375 found = false;
2377 for (j = 0; j < i; j++)
2379 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2381 if (vect_vfa_range_equal (ddr_i, ddr_j))
2383 if (vect_print_dump_info (REPORT_DR_DETAILS))
2385 fprintf (vect_dump, "found equal ranges ");
2386 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2387 fprintf (vect_dump, ", ");
2388 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2389 fprintf (vect_dump, " and ");
2390 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2391 fprintf (vect_dump, ", ");
2392 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2394 found = true;
2395 break;
2399 if (found)
2401 VEC_ordered_remove (ddr_p, ddrs, i);
2402 continue;
2404 i++;
2407 if (VEC_length (ddr_p, ddrs) >
2408 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2410 if (vect_print_dump_info (REPORT_DR_DETAILS))
2412 fprintf (vect_dump,
2413 "disable versioning for alias - max number of generated "
2414 "checks exceeded.");
2417 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2419 return false;
2422 return true;
2426 /* Function vect_analyze_data_refs.
2428 Find all the data references in the loop or basic block.
2430 The general structure of the analysis of data refs in the vectorizer is as
2431 follows:
2432 1- vect_analyze_data_refs(loop/bb): call
2433 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2434 in the loop/bb and their dependences.
2435 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2436 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2437 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2441 bool
2442 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2443 bb_vec_info bb_vinfo,
2444 int *min_vf)
2446 struct loop *loop = NULL;
2447 basic_block bb = NULL;
2448 unsigned int i;
2449 VEC (data_reference_p, heap) *datarefs;
2450 struct data_reference *dr;
2451 tree scalar_type;
2452 bool res;
2454 if (vect_print_dump_info (REPORT_DETAILS))
2455 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2457 if (loop_vinfo)
2459 loop = LOOP_VINFO_LOOP (loop_vinfo);
2460 res = compute_data_dependences_for_loop
2461 (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
2462 &LOOP_VINFO_DDRS (loop_vinfo));
2464 if (!res)
2466 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2467 fprintf (vect_dump, "not vectorized: loop contains function calls"
2468 " or data references that cannot be analyzed");
2469 return false;
2472 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2474 else
2476 bb = BB_VINFO_BB (bb_vinfo);
2477 res = compute_data_dependences_for_bb (bb, true,
2478 &BB_VINFO_DATAREFS (bb_vinfo),
2479 &BB_VINFO_DDRS (bb_vinfo));
2480 if (!res)
2482 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2483 fprintf (vect_dump, "not vectorized: basic block contains function"
2484 " calls or data references that cannot be analyzed");
2485 return false;
2488 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2491 /* Go through the data-refs, check that the analysis succeeded. Update
2492 pointer from stmt_vec_info struct to DR and vectype. */
2494 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2496 gimple stmt;
2497 stmt_vec_info stmt_info;
2498 tree base, offset, init;
2499 int vf;
2501 if (!dr || !DR_REF (dr))
2503 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2504 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2505 return false;
2508 stmt = DR_STMT (dr);
2509 stmt_info = vinfo_for_stmt (stmt);
2511 /* Check that analysis of the data-ref succeeded. */
2512 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2513 || !DR_STEP (dr))
2515 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2517 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2518 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2521 if (bb_vinfo)
2523 /* Mark the statement as not vectorizable. */
2524 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2525 continue;
2527 else
2528 return false;
2531 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2533 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2534 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2535 "constant");
2536 if (bb_vinfo)
2538 /* Mark the statement as not vectorizable. */
2539 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2540 continue;
2542 else
2543 return false;
2546 base = unshare_expr (DR_BASE_ADDRESS (dr));
2547 offset = unshare_expr (DR_OFFSET (dr));
2548 init = unshare_expr (DR_INIT (dr));
2550 if (stmt_could_throw_p (stmt))
2552 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2554 fprintf (vect_dump, "not vectorized: statement can throw an "
2555 "exception ");
2556 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2558 return false;
2561 /* Update DR field in stmt_vec_info struct. */
2563 /* If the dataref is in an inner-loop of the loop that is considered for
2564 for vectorization, we also want to analyze the access relative to
2565 the outer-loop (DR contains information only relative to the
2566 inner-most enclosing loop). We do that by building a reference to the
2567 first location accessed by the inner-loop, and analyze it relative to
2568 the outer-loop. */
2569 if (loop && nested_in_vect_loop_p (loop, stmt))
2571 tree outer_step, outer_base, outer_init;
2572 HOST_WIDE_INT pbitsize, pbitpos;
2573 tree poffset;
2574 enum machine_mode pmode;
2575 int punsignedp, pvolatilep;
2576 affine_iv base_iv, offset_iv;
2577 tree dinit;
2579 /* Build a reference to the first location accessed by the
2580 inner-loop: *(BASE+INIT). (The first location is actually
2581 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2582 tree inner_base = build_fold_indirect_ref
2583 (fold_build2 (POINTER_PLUS_EXPR,
2584 TREE_TYPE (base), base,
2585 fold_convert (sizetype, init)));
2587 if (vect_print_dump_info (REPORT_DETAILS))
2589 fprintf (vect_dump, "analyze in outer-loop: ");
2590 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2593 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2594 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2595 gcc_assert (outer_base != NULL_TREE);
2597 if (pbitpos % BITS_PER_UNIT != 0)
2599 if (vect_print_dump_info (REPORT_DETAILS))
2600 fprintf (vect_dump, "failed: bit offset alignment.\n");
2601 return false;
2604 outer_base = build_fold_addr_expr (outer_base);
2605 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2606 &base_iv, false))
2608 if (vect_print_dump_info (REPORT_DETAILS))
2609 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2610 return false;
2613 if (offset)
2615 if (poffset)
2616 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2617 poffset);
2618 else
2619 poffset = offset;
2622 if (!poffset)
2624 offset_iv.base = ssize_int (0);
2625 offset_iv.step = ssize_int (0);
2627 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2628 &offset_iv, false))
2630 if (vect_print_dump_info (REPORT_DETAILS))
2631 fprintf (vect_dump, "evolution of offset is not affine.\n");
2632 return false;
2635 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2636 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2637 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2638 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2639 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2641 outer_step = size_binop (PLUS_EXPR,
2642 fold_convert (ssizetype, base_iv.step),
2643 fold_convert (ssizetype, offset_iv.step));
2645 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2646 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2647 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2648 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2649 STMT_VINFO_DR_OFFSET (stmt_info) =
2650 fold_convert (ssizetype, offset_iv.base);
2651 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2652 size_int (highest_pow2_factor (offset_iv.base));
2654 if (vect_print_dump_info (REPORT_DETAILS))
2656 fprintf (vect_dump, "\touter base_address: ");
2657 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2658 fprintf (vect_dump, "\n\touter offset from base address: ");
2659 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2660 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2661 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2662 fprintf (vect_dump, "\n\touter step: ");
2663 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2664 fprintf (vect_dump, "\n\touter aligned to: ");
2665 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2669 if (STMT_VINFO_DATA_REF (stmt_info))
2671 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2673 fprintf (vect_dump,
2674 "not vectorized: more than one data ref in stmt: ");
2675 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2677 return false;
2680 STMT_VINFO_DATA_REF (stmt_info) = dr;
2682 /* Set vectype for STMT. */
2683 scalar_type = TREE_TYPE (DR_REF (dr));
2684 STMT_VINFO_VECTYPE (stmt_info) =
2685 get_vectype_for_scalar_type (scalar_type);
2686 if (!STMT_VINFO_VECTYPE (stmt_info))
2688 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2690 fprintf (vect_dump,
2691 "not vectorized: no vectype for stmt: ");
2692 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2693 fprintf (vect_dump, " scalar_type: ");
2694 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2697 if (bb_vinfo)
2699 /* Mark the statement as not vectorizable. */
2700 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2701 continue;
2703 else
2704 return false;
2707 /* Adjust the minimal vectorization factor according to the
2708 vector type. */
2709 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2710 if (vf > *min_vf)
2711 *min_vf = vf;
2714 return true;
2718 /* Function vect_get_new_vect_var.
2720 Returns a name for a new variable. The current naming scheme appends the
2721 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2722 the name of vectorizer generated variables, and appends that to NAME if
2723 provided. */
2725 tree
2726 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2728 const char *prefix;
2729 tree new_vect_var;
2731 switch (var_kind)
2733 case vect_simple_var:
2734 prefix = "vect_";
2735 break;
2736 case vect_scalar_var:
2737 prefix = "stmp_";
2738 break;
2739 case vect_pointer_var:
2740 prefix = "vect_p";
2741 break;
2742 default:
2743 gcc_unreachable ();
2746 if (name)
2748 char* tmp = concat (prefix, name, NULL);
2749 new_vect_var = create_tmp_var (type, tmp);
2750 free (tmp);
2752 else
2753 new_vect_var = create_tmp_var (type, prefix);
2755 /* Mark vector typed variable as a gimple register variable. */
2756 if (TREE_CODE (type) == VECTOR_TYPE)
2757 DECL_GIMPLE_REG_P (new_vect_var) = true;
2759 return new_vect_var;
2763 /* Function vect_create_addr_base_for_vector_ref.
2765 Create an expression that computes the address of the first memory location
2766 that will be accessed for a data reference.
2768 Input:
2769 STMT: The statement containing the data reference.
2770 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2771 OFFSET: Optional. If supplied, it is be added to the initial address.
2772 LOOP: Specify relative to which loop-nest should the address be computed.
2773 For example, when the dataref is in an inner-loop nested in an
2774 outer-loop that is now being vectorized, LOOP can be either the
2775 outer-loop, or the inner-loop. The first memory location accessed
2776 by the following dataref ('in' points to short):
2778 for (i=0; i<N; i++)
2779 for (j=0; j<M; j++)
2780 s += in[i+j]
2782 is as follows:
2783 if LOOP=i_loop: &in (relative to i_loop)
2784 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2786 Output:
2787 1. Return an SSA_NAME whose value is the address of the memory location of
2788 the first vector of the data reference.
2789 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2790 these statement(s) which define the returned SSA_NAME.
2792 FORNOW: We are only handling array accesses with step 1. */
2794 tree
2795 vect_create_addr_base_for_vector_ref (gimple stmt,
2796 gimple_seq *new_stmt_list,
2797 tree offset,
2798 struct loop *loop)
2800 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2801 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2802 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2803 tree base_name;
2804 tree data_ref_base_var;
2805 tree vec_stmt;
2806 tree addr_base, addr_expr;
2807 tree dest;
2808 gimple_seq seq = NULL;
2809 tree base_offset = unshare_expr (DR_OFFSET (dr));
2810 tree init = unshare_expr (DR_INIT (dr));
2811 tree vect_ptr_type;
2812 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2813 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2814 tree base;
2816 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2818 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2820 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2822 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2823 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2824 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2827 if (loop_vinfo)
2828 base_name = build_fold_indirect_ref (data_ref_base);
2829 else
2831 base_offset = ssize_int (0);
2832 init = ssize_int (0);
2833 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2836 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2837 add_referenced_var (data_ref_base_var);
2838 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2839 data_ref_base_var);
2840 gimple_seq_add_seq (new_stmt_list, seq);
2842 /* Create base_offset */
2843 base_offset = size_binop (PLUS_EXPR,
2844 fold_convert (sizetype, base_offset),
2845 fold_convert (sizetype, init));
2846 dest = create_tmp_var (sizetype, "base_off");
2847 add_referenced_var (dest);
2848 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2849 gimple_seq_add_seq (new_stmt_list, seq);
2851 if (offset)
2853 tree tmp = create_tmp_var (sizetype, "offset");
2855 add_referenced_var (tmp);
2856 offset = fold_build2 (MULT_EXPR, sizetype,
2857 fold_convert (sizetype, offset), step);
2858 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2859 base_offset, offset);
2860 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2861 gimple_seq_add_seq (new_stmt_list, seq);
2864 /* base + base_offset */
2865 if (loop_vinfo)
2866 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2867 data_ref_base, base_offset);
2868 else
2870 addr_base = build1 (ADDR_EXPR,
2871 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2872 unshare_expr (DR_REF (dr)));
2875 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2876 base = get_base_address (DR_REF (dr));
2877 if (base
2878 && TREE_CODE (base) == MEM_REF)
2879 vect_ptr_type
2880 = build_qualified_type (vect_ptr_type,
2881 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2883 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2884 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2885 get_name (base_name));
2886 add_referenced_var (addr_expr);
2887 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2888 gimple_seq_add_seq (new_stmt_list, seq);
2890 if (DR_PTR_INFO (dr)
2891 && TREE_CODE (vec_stmt) == SSA_NAME)
2892 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2894 if (vect_print_dump_info (REPORT_DETAILS))
2896 fprintf (vect_dump, "created ");
2897 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2900 return vec_stmt;
2904 /* Function vect_create_data_ref_ptr.
2906 Create a new pointer to vector type (vp), that points to the first location
2907 accessed in the loop by STMT, along with the def-use update chain to
2908 appropriately advance the pointer through the loop iterations. Also set
2909 aliasing information for the pointer. This vector pointer is used by the
2910 callers to this function to create a memory reference expression for vector
2911 load/store access.
2913 Input:
2914 1. STMT: a stmt that references memory. Expected to be of the form
2915 GIMPLE_ASSIGN <name, data-ref> or
2916 GIMPLE_ASSIGN <data-ref, name>.
2917 2. AT_LOOP: the loop where the vector memref is to be created.
2918 3. OFFSET (optional): an offset to be added to the initial address accessed
2919 by the data-ref in STMT.
2920 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2921 pointing to the initial address.
2922 5. TYPE: if not NULL indicates the required type of the data-ref.
2924 Output:
2925 1. Declare a new ptr to vector_type, and have it point to the base of the
2926 data reference (initial addressed accessed by the data reference).
2927 For example, for vector of type V8HI, the following code is generated:
2929 v8hi *vp;
2930 vp = (v8hi *)initial_address;
2932 if OFFSET is not supplied:
2933 initial_address = &a[init];
2934 if OFFSET is supplied:
2935 initial_address = &a[init + OFFSET];
2937 Return the initial_address in INITIAL_ADDRESS.
2939 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2940 update the pointer in each iteration of the loop.
2942 Return the increment stmt that updates the pointer in PTR_INCR.
2944 3. Set INV_P to true if the access pattern of the data reference in the
2945 vectorized loop is invariant. Set it to false otherwise.
2947 4. Return the pointer. */
2949 tree
2950 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2951 tree offset, tree *initial_address, gimple *ptr_incr,
2952 bool only_init, bool *inv_p)
2954 tree base_name;
2955 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2956 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2957 struct loop *loop = NULL;
2958 bool nested_in_vect_loop = false;
2959 struct loop *containing_loop = NULL;
2960 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2961 tree vect_ptr_type;
2962 tree vect_ptr;
2963 tree new_temp;
2964 gimple vec_stmt;
2965 gimple_seq new_stmt_list = NULL;
2966 edge pe = NULL;
2967 basic_block new_bb;
2968 tree vect_ptr_init;
2969 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2970 tree vptr;
2971 gimple_stmt_iterator incr_gsi;
2972 bool insert_after;
2973 tree indx_before_incr, indx_after_incr;
2974 gimple incr;
2975 tree step;
2976 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2977 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2978 tree base;
2980 if (loop_vinfo)
2982 loop = LOOP_VINFO_LOOP (loop_vinfo);
2983 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2984 containing_loop = (gimple_bb (stmt))->loop_father;
2985 pe = loop_preheader_edge (loop);
2987 else
2989 gcc_assert (bb_vinfo);
2990 only_init = true;
2991 *ptr_incr = NULL;
2994 /* Check the step (evolution) of the load in LOOP, and record
2995 whether it's invariant. */
2996 if (nested_in_vect_loop)
2997 step = STMT_VINFO_DR_STEP (stmt_info);
2998 else
2999 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3001 if (tree_int_cst_compare (step, size_zero_node) == 0)
3002 *inv_p = true;
3003 else
3004 *inv_p = false;
3006 /* Create an expression for the first address accessed by this load
3007 in LOOP. */
3008 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3010 if (vect_print_dump_info (REPORT_DETAILS))
3012 tree data_ref_base = base_name;
3013 fprintf (vect_dump, "create vector-pointer variable to type: ");
3014 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3015 if (TREE_CODE (data_ref_base) == VAR_DECL
3016 || TREE_CODE (data_ref_base) == ARRAY_REF)
3017 fprintf (vect_dump, " vectorizing an array ref: ");
3018 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3019 fprintf (vect_dump, " vectorizing a record based array ref: ");
3020 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3021 fprintf (vect_dump, " vectorizing a pointer ref: ");
3022 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3025 /* (1) Create the new vector-pointer variable. */
3026 vect_ptr_type = build_pointer_type (vectype);
3027 base = get_base_address (DR_REF (dr));
3028 if (base
3029 && TREE_CODE (base) == MEM_REF)
3030 vect_ptr_type
3031 = build_qualified_type (vect_ptr_type,
3032 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3033 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3034 get_name (base_name));
3036 /* Vector types inherit the alias set of their component type by default so
3037 we need to use a ref-all pointer if the data reference does not conflict
3038 with the created vector data reference because it is not addressable. */
3039 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3040 get_alias_set (DR_REF (dr))))
3042 vect_ptr_type
3043 = build_pointer_type_for_mode (vectype,
3044 TYPE_MODE (vect_ptr_type), true);
3045 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3046 get_name (base_name));
3049 /* Likewise for any of the data references in the stmt group. */
3050 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
3052 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
3055 tree lhs = gimple_assign_lhs (orig_stmt);
3056 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3057 get_alias_set (lhs)))
3059 vect_ptr_type
3060 = build_pointer_type_for_mode (vectype,
3061 TYPE_MODE (vect_ptr_type), true);
3062 vect_ptr
3063 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3064 get_name (base_name));
3065 break;
3068 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
3070 while (orig_stmt);
3073 add_referenced_var (vect_ptr);
3075 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3076 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3077 def-use update cycles for the pointer: one relative to the outer-loop
3078 (LOOP), which is what steps (3) and (4) below do. The other is relative
3079 to the inner-loop (which is the inner-most loop containing the dataref),
3080 and this is done be step (5) below.
3082 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3083 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3084 redundant. Steps (3),(4) create the following:
3086 vp0 = &base_addr;
3087 LOOP: vp1 = phi(vp0,vp2)
3090 vp2 = vp1 + step
3091 goto LOOP
3093 If there is an inner-loop nested in loop, then step (5) will also be
3094 applied, and an additional update in the inner-loop will be created:
3096 vp0 = &base_addr;
3097 LOOP: vp1 = phi(vp0,vp2)
3099 inner: vp3 = phi(vp1,vp4)
3100 vp4 = vp3 + inner_step
3101 if () goto inner
3103 vp2 = vp1 + step
3104 if () goto LOOP */
3106 /* (2) Calculate the initial address the vector-pointer, and set
3107 the vector-pointer to point to it before the loop. */
3109 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3111 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3112 offset, loop);
3113 if (new_stmt_list)
3115 if (pe)
3117 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3118 gcc_assert (!new_bb);
3120 else
3121 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
3124 *initial_address = new_temp;
3126 /* Create: p = (vectype *) initial_base */
3127 if (TREE_CODE (new_temp) != SSA_NAME
3128 || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
3130 vec_stmt = gimple_build_assign (vect_ptr,
3131 fold_convert (vect_ptr_type, new_temp));
3132 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
3133 /* Copy the points-to information if it exists. */
3134 if (DR_PTR_INFO (dr))
3135 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
3136 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
3137 if (pe)
3139 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3140 gcc_assert (!new_bb);
3142 else
3143 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
3145 else
3146 vect_ptr_init = new_temp;
3148 /* (3) Handle the updating of the vector-pointer inside the loop.
3149 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3150 inner-loop nested in LOOP (during outer-loop vectorization). */
3152 /* No update in loop is required. */
3153 if (only_init && (!loop_vinfo || at_loop == loop))
3154 vptr = vect_ptr_init;
3155 else
3157 /* The step of the vector pointer is the Vector Size. */
3158 tree step = TYPE_SIZE_UNIT (vectype);
3159 /* One exception to the above is when the scalar step of the load in
3160 LOOP is zero. In this case the step here is also zero. */
3161 if (*inv_p)
3162 step = size_zero_node;
3164 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3166 create_iv (vect_ptr_init,
3167 fold_convert (vect_ptr_type, step),
3168 vect_ptr, loop, &incr_gsi, insert_after,
3169 &indx_before_incr, &indx_after_incr);
3170 incr = gsi_stmt (incr_gsi);
3171 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3173 /* Copy the points-to information if it exists. */
3174 if (DR_PTR_INFO (dr))
3176 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3177 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3179 if (ptr_incr)
3180 *ptr_incr = incr;
3182 vptr = indx_before_incr;
3185 if (!nested_in_vect_loop || only_init)
3186 return vptr;
3189 /* (4) Handle the updating of the vector-pointer inside the inner-loop
3190 nested in LOOP, if exists. */
3192 gcc_assert (nested_in_vect_loop);
3193 if (!only_init)
3195 standard_iv_increment_position (containing_loop, &incr_gsi,
3196 &insert_after);
3197 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
3198 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3199 &indx_after_incr);
3200 incr = gsi_stmt (incr_gsi);
3201 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3203 /* Copy the points-to information if it exists. */
3204 if (DR_PTR_INFO (dr))
3206 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3207 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3209 if (ptr_incr)
3210 *ptr_incr = incr;
3212 return indx_before_incr;
3214 else
3215 gcc_unreachable ();
3219 /* Function bump_vector_ptr
3221 Increment a pointer (to a vector type) by vector-size. If requested,
3222 i.e. if PTR-INCR is given, then also connect the new increment stmt
3223 to the existing def-use update-chain of the pointer, by modifying
3224 the PTR_INCR as illustrated below:
3226 The pointer def-use update-chain before this function:
3227 DATAREF_PTR = phi (p_0, p_2)
3228 ....
3229 PTR_INCR: p_2 = DATAREF_PTR + step
3231 The pointer def-use update-chain after this function:
3232 DATAREF_PTR = phi (p_0, p_2)
3233 ....
3234 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3235 ....
3236 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3238 Input:
3239 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3240 in the loop.
3241 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3242 the loop. The increment amount across iterations is expected
3243 to be vector_size.
3244 BSI - location where the new update stmt is to be placed.
3245 STMT - the original scalar memory-access stmt that is being vectorized.
3246 BUMP - optional. The offset by which to bump the pointer. If not given,
3247 the offset is assumed to be vector_size.
3249 Output: Return NEW_DATAREF_PTR as illustrated above.
3253 tree
3254 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3255 gimple stmt, tree bump)
3257 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3258 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3259 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3260 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3261 tree update = TYPE_SIZE_UNIT (vectype);
3262 gimple incr_stmt;
3263 ssa_op_iter iter;
3264 use_operand_p use_p;
3265 tree new_dataref_ptr;
3267 if (bump)
3268 update = bump;
3270 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3271 dataref_ptr, update);
3272 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3273 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3274 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3276 /* Copy the points-to information if it exists. */
3277 if (DR_PTR_INFO (dr))
3278 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3280 if (!ptr_incr)
3281 return new_dataref_ptr;
3283 /* Update the vector-pointer's cross-iteration increment. */
3284 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3286 tree use = USE_FROM_PTR (use_p);
3288 if (use == dataref_ptr)
3289 SET_USE (use_p, new_dataref_ptr);
3290 else
3291 gcc_assert (tree_int_cst_compare (use, update) == 0);
3294 return new_dataref_ptr;
3298 /* Function vect_create_destination_var.
3300 Create a new temporary of type VECTYPE. */
3302 tree
3303 vect_create_destination_var (tree scalar_dest, tree vectype)
3305 tree vec_dest;
3306 const char *new_name;
3307 tree type;
3308 enum vect_var_kind kind;
3310 kind = vectype ? vect_simple_var : vect_scalar_var;
3311 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3313 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3315 new_name = get_name (scalar_dest);
3316 if (!new_name)
3317 new_name = "var_";
3318 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3319 add_referenced_var (vec_dest);
3321 return vec_dest;
3324 /* Function vect_strided_store_supported.
3326 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3327 and FALSE otherwise. */
3329 bool
3330 vect_strided_store_supported (tree vectype)
3332 optab interleave_high_optab, interleave_low_optab;
3333 enum machine_mode mode;
3335 mode = TYPE_MODE (vectype);
3337 /* Check that the operation is supported. */
3338 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3339 vectype, optab_default);
3340 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3341 vectype, optab_default);
3342 if (!interleave_high_optab || !interleave_low_optab)
3344 if (vect_print_dump_info (REPORT_DETAILS))
3345 fprintf (vect_dump, "no optab for interleave.");
3346 return false;
3349 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3350 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3352 if (vect_print_dump_info (REPORT_DETAILS))
3353 fprintf (vect_dump, "interleave op not supported by target.");
3354 return false;
3357 return true;
3361 /* Function vect_permute_store_chain.
3363 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3364 a power of 2, generate interleave_high/low stmts to reorder the data
3365 correctly for the stores. Return the final references for stores in
3366 RESULT_CHAIN.
3368 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3369 The input is 4 vectors each containing 8 elements. We assign a number to
3370 each element, the input sequence is:
3372 1st vec: 0 1 2 3 4 5 6 7
3373 2nd vec: 8 9 10 11 12 13 14 15
3374 3rd vec: 16 17 18 19 20 21 22 23
3375 4th vec: 24 25 26 27 28 29 30 31
3377 The output sequence should be:
3379 1st vec: 0 8 16 24 1 9 17 25
3380 2nd vec: 2 10 18 26 3 11 19 27
3381 3rd vec: 4 12 20 28 5 13 21 30
3382 4th vec: 6 14 22 30 7 15 23 31
3384 i.e., we interleave the contents of the four vectors in their order.
3386 We use interleave_high/low instructions to create such output. The input of
3387 each interleave_high/low operation is two vectors:
3388 1st vec 2nd vec
3389 0 1 2 3 4 5 6 7
3390 the even elements of the result vector are obtained left-to-right from the
3391 high/low elements of the first vector. The odd elements of the result are
3392 obtained left-to-right from the high/low elements of the second vector.
3393 The output of interleave_high will be: 0 4 1 5
3394 and of interleave_low: 2 6 3 7
3397 The permutation is done in log LENGTH stages. In each stage interleave_high
3398 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3399 where the first argument is taken from the first half of DR_CHAIN and the
3400 second argument from it's second half.
3401 In our example,
3403 I1: interleave_high (1st vec, 3rd vec)
3404 I2: interleave_low (1st vec, 3rd vec)
3405 I3: interleave_high (2nd vec, 4th vec)
3406 I4: interleave_low (2nd vec, 4th vec)
3408 The output for the first stage is:
3410 I1: 0 16 1 17 2 18 3 19
3411 I2: 4 20 5 21 6 22 7 23
3412 I3: 8 24 9 25 10 26 11 27
3413 I4: 12 28 13 29 14 30 15 31
3415 The output of the second stage, i.e. the final result is:
3417 I1: 0 8 16 24 1 9 17 25
3418 I2: 2 10 18 26 3 11 19 27
3419 I3: 4 12 20 28 5 13 21 30
3420 I4: 6 14 22 30 7 15 23 31. */
3422 bool
3423 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3424 unsigned int length,
3425 gimple stmt,
3426 gimple_stmt_iterator *gsi,
3427 VEC(tree,heap) **result_chain)
3429 tree perm_dest, vect1, vect2, high, low;
3430 gimple perm_stmt;
3431 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3432 int i;
3433 unsigned int j;
3434 enum tree_code high_code, low_code;
3436 /* Check that the operation is supported. */
3437 if (!vect_strided_store_supported (vectype))
3438 return false;
3440 *result_chain = VEC_copy (tree, heap, dr_chain);
3442 for (i = 0; i < exact_log2 (length); i++)
3444 for (j = 0; j < length/2; j++)
3446 vect1 = VEC_index (tree, dr_chain, j);
3447 vect2 = VEC_index (tree, dr_chain, j+length/2);
3449 /* Create interleaving stmt:
3450 in the case of big endian:
3451 high = interleave_high (vect1, vect2)
3452 and in the case of little endian:
3453 high = interleave_low (vect1, vect2). */
3454 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3455 DECL_GIMPLE_REG_P (perm_dest) = 1;
3456 add_referenced_var (perm_dest);
3457 if (BYTES_BIG_ENDIAN)
3459 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3460 low_code = VEC_INTERLEAVE_LOW_EXPR;
3462 else
3464 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3465 high_code = VEC_INTERLEAVE_LOW_EXPR;
3467 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3468 vect1, vect2);
3469 high = make_ssa_name (perm_dest, perm_stmt);
3470 gimple_assign_set_lhs (perm_stmt, high);
3471 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3472 VEC_replace (tree, *result_chain, 2*j, high);
3474 /* Create interleaving stmt:
3475 in the case of big endian:
3476 low = interleave_low (vect1, vect2)
3477 and in the case of little endian:
3478 low = interleave_high (vect1, vect2). */
3479 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3480 DECL_GIMPLE_REG_P (perm_dest) = 1;
3481 add_referenced_var (perm_dest);
3482 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3483 vect1, vect2);
3484 low = make_ssa_name (perm_dest, perm_stmt);
3485 gimple_assign_set_lhs (perm_stmt, low);
3486 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3487 VEC_replace (tree, *result_chain, 2*j+1, low);
3489 dr_chain = VEC_copy (tree, heap, *result_chain);
3491 return true;
3494 /* Function vect_setup_realignment
3496 This function is called when vectorizing an unaligned load using
3497 the dr_explicit_realign[_optimized] scheme.
3498 This function generates the following code at the loop prolog:
3500 p = initial_addr;
3501 x msq_init = *(floor(p)); # prolog load
3502 realignment_token = call target_builtin;
3503 loop:
3504 x msq = phi (msq_init, ---)
3506 The stmts marked with x are generated only for the case of
3507 dr_explicit_realign_optimized.
3509 The code above sets up a new (vector) pointer, pointing to the first
3510 location accessed by STMT, and a "floor-aligned" load using that pointer.
3511 It also generates code to compute the "realignment-token" (if the relevant
3512 target hook was defined), and creates a phi-node at the loop-header bb
3513 whose arguments are the result of the prolog-load (created by this
3514 function) and the result of a load that takes place in the loop (to be
3515 created by the caller to this function).
3517 For the case of dr_explicit_realign_optimized:
3518 The caller to this function uses the phi-result (msq) to create the
3519 realignment code inside the loop, and sets up the missing phi argument,
3520 as follows:
3521 loop:
3522 msq = phi (msq_init, lsq)
3523 lsq = *(floor(p')); # load in loop
3524 result = realign_load (msq, lsq, realignment_token);
3526 For the case of dr_explicit_realign:
3527 loop:
3528 msq = *(floor(p)); # load in loop
3529 p' = p + (VS-1);
3530 lsq = *(floor(p')); # load in loop
3531 result = realign_load (msq, lsq, realignment_token);
3533 Input:
3534 STMT - (scalar) load stmt to be vectorized. This load accesses
3535 a memory location that may be unaligned.
3536 BSI - place where new code is to be inserted.
3537 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3538 is used.
3540 Output:
3541 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3542 target hook, if defined.
3543 Return value - the result of the loop-header phi node. */
3545 tree
3546 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3547 tree *realignment_token,
3548 enum dr_alignment_support alignment_support_scheme,
3549 tree init_addr,
3550 struct loop **at_loop)
3552 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3553 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3554 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3555 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3556 struct loop *loop = NULL;
3557 edge pe = NULL;
3558 tree scalar_dest = gimple_assign_lhs (stmt);
3559 tree vec_dest;
3560 gimple inc;
3561 tree ptr;
3562 tree data_ref;
3563 gimple new_stmt;
3564 basic_block new_bb;
3565 tree msq_init = NULL_TREE;
3566 tree new_temp;
3567 gimple phi_stmt;
3568 tree msq = NULL_TREE;
3569 gimple_seq stmts = NULL;
3570 bool inv_p;
3571 bool compute_in_loop = false;
3572 bool nested_in_vect_loop = false;
3573 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3574 struct loop *loop_for_initial_load = NULL;
3576 if (loop_vinfo)
3578 loop = LOOP_VINFO_LOOP (loop_vinfo);
3579 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3582 gcc_assert (alignment_support_scheme == dr_explicit_realign
3583 || alignment_support_scheme == dr_explicit_realign_optimized);
3585 /* We need to generate three things:
3586 1. the misalignment computation
3587 2. the extra vector load (for the optimized realignment scheme).
3588 3. the phi node for the two vectors from which the realignment is
3589 done (for the optimized realignment scheme). */
3591 /* 1. Determine where to generate the misalignment computation.
3593 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3594 calculation will be generated by this function, outside the loop (in the
3595 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3596 caller, inside the loop.
3598 Background: If the misalignment remains fixed throughout the iterations of
3599 the loop, then both realignment schemes are applicable, and also the
3600 misalignment computation can be done outside LOOP. This is because we are
3601 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3602 are a multiple of VS (the Vector Size), and therefore the misalignment in
3603 different vectorized LOOP iterations is always the same.
3604 The problem arises only if the memory access is in an inner-loop nested
3605 inside LOOP, which is now being vectorized using outer-loop vectorization.
3606 This is the only case when the misalignment of the memory access may not
3607 remain fixed throughout the iterations of the inner-loop (as explained in
3608 detail in vect_supportable_dr_alignment). In this case, not only is the
3609 optimized realignment scheme not applicable, but also the misalignment
3610 computation (and generation of the realignment token that is passed to
3611 REALIGN_LOAD) have to be done inside the loop.
3613 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3614 or not, which in turn determines if the misalignment is computed inside
3615 the inner-loop, or outside LOOP. */
3617 if (init_addr != NULL_TREE || !loop_vinfo)
3619 compute_in_loop = true;
3620 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3624 /* 2. Determine where to generate the extra vector load.
3626 For the optimized realignment scheme, instead of generating two vector
3627 loads in each iteration, we generate a single extra vector load in the
3628 preheader of the loop, and in each iteration reuse the result of the
3629 vector load from the previous iteration. In case the memory access is in
3630 an inner-loop nested inside LOOP, which is now being vectorized using
3631 outer-loop vectorization, we need to determine whether this initial vector
3632 load should be generated at the preheader of the inner-loop, or can be
3633 generated at the preheader of LOOP. If the memory access has no evolution
3634 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3635 to be generated inside LOOP (in the preheader of the inner-loop). */
3637 if (nested_in_vect_loop)
3639 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3640 bool invariant_in_outerloop =
3641 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3642 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3644 else
3645 loop_for_initial_load = loop;
3646 if (at_loop)
3647 *at_loop = loop_for_initial_load;
3649 if (loop_for_initial_load)
3650 pe = loop_preheader_edge (loop_for_initial_load);
3652 /* 3. For the case of the optimized realignment, create the first vector
3653 load at the loop preheader. */
3655 if (alignment_support_scheme == dr_explicit_realign_optimized)
3657 /* Create msq_init = *(floor(p1)) in the loop preheader */
3659 gcc_assert (!compute_in_loop);
3660 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3661 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3662 &init_addr, &inc, true, &inv_p);
3663 new_stmt = gimple_build_assign_with_ops
3664 (BIT_AND_EXPR, NULL_TREE, ptr,
3665 build_int_cst (TREE_TYPE (ptr),
3666 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3667 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3668 gimple_assign_set_lhs (new_stmt, new_temp);
3669 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3670 gcc_assert (!new_bb);
3671 data_ref
3672 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3673 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3674 new_stmt = gimple_build_assign (vec_dest, data_ref);
3675 new_temp = make_ssa_name (vec_dest, new_stmt);
3676 gimple_assign_set_lhs (new_stmt, new_temp);
3677 mark_symbols_for_renaming (new_stmt);
3678 if (pe)
3680 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3681 gcc_assert (!new_bb);
3683 else
3684 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3686 msq_init = gimple_assign_lhs (new_stmt);
3689 /* 4. Create realignment token using a target builtin, if available.
3690 It is done either inside the containing loop, or before LOOP (as
3691 determined above). */
3693 if (targetm.vectorize.builtin_mask_for_load)
3695 tree builtin_decl;
3697 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3698 if (!init_addr)
3700 /* Generate the INIT_ADDR computation outside LOOP. */
3701 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3702 NULL_TREE, loop);
3703 if (loop)
3705 pe = loop_preheader_edge (loop);
3706 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3707 gcc_assert (!new_bb);
3709 else
3710 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3713 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3714 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3715 vec_dest =
3716 vect_create_destination_var (scalar_dest,
3717 gimple_call_return_type (new_stmt));
3718 new_temp = make_ssa_name (vec_dest, new_stmt);
3719 gimple_call_set_lhs (new_stmt, new_temp);
3721 if (compute_in_loop)
3722 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3723 else
3725 /* Generate the misalignment computation outside LOOP. */
3726 pe = loop_preheader_edge (loop);
3727 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3728 gcc_assert (!new_bb);
3731 *realignment_token = gimple_call_lhs (new_stmt);
3733 /* The result of the CALL_EXPR to this builtin is determined from
3734 the value of the parameter and no global variables are touched
3735 which makes the builtin a "const" function. Requiring the
3736 builtin to have the "const" attribute makes it unnecessary
3737 to call mark_call_clobbered. */
3738 gcc_assert (TREE_READONLY (builtin_decl));
3741 if (alignment_support_scheme == dr_explicit_realign)
3742 return msq;
3744 gcc_assert (!compute_in_loop);
3745 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3748 /* 5. Create msq = phi <msq_init, lsq> in loop */
3750 pe = loop_preheader_edge (containing_loop);
3751 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3752 msq = make_ssa_name (vec_dest, NULL);
3753 phi_stmt = create_phi_node (msq, containing_loop->header);
3754 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3755 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3757 return msq;
3761 /* Function vect_strided_load_supported.
3763 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3764 and FALSE otherwise. */
3766 bool
3767 vect_strided_load_supported (tree vectype)
3769 optab perm_even_optab, perm_odd_optab;
3770 enum machine_mode mode;
3772 mode = TYPE_MODE (vectype);
3774 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3775 optab_default);
3776 if (!perm_even_optab)
3778 if (vect_print_dump_info (REPORT_DETAILS))
3779 fprintf (vect_dump, "no optab for perm_even.");
3780 return false;
3783 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3785 if (vect_print_dump_info (REPORT_DETAILS))
3786 fprintf (vect_dump, "perm_even op not supported by target.");
3787 return false;
3790 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3791 optab_default);
3792 if (!perm_odd_optab)
3794 if (vect_print_dump_info (REPORT_DETAILS))
3795 fprintf (vect_dump, "no optab for perm_odd.");
3796 return false;
3799 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3801 if (vect_print_dump_info (REPORT_DETAILS))
3802 fprintf (vect_dump, "perm_odd op not supported by target.");
3803 return false;
3805 return true;
3809 /* Function vect_permute_load_chain.
3811 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3812 a power of 2, generate extract_even/odd stmts to reorder the input data
3813 correctly. Return the final references for loads in RESULT_CHAIN.
3815 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3816 The input is 4 vectors each containing 8 elements. We assign a number to each
3817 element, the input sequence is:
3819 1st vec: 0 1 2 3 4 5 6 7
3820 2nd vec: 8 9 10 11 12 13 14 15
3821 3rd vec: 16 17 18 19 20 21 22 23
3822 4th vec: 24 25 26 27 28 29 30 31
3824 The output sequence should be:
3826 1st vec: 0 4 8 12 16 20 24 28
3827 2nd vec: 1 5 9 13 17 21 25 29
3828 3rd vec: 2 6 10 14 18 22 26 30
3829 4th vec: 3 7 11 15 19 23 27 31
3831 i.e., the first output vector should contain the first elements of each
3832 interleaving group, etc.
3834 We use extract_even/odd instructions to create such output. The input of
3835 each extract_even/odd operation is two vectors
3836 1st vec 2nd vec
3837 0 1 2 3 4 5 6 7
3839 and the output is the vector of extracted even/odd elements. The output of
3840 extract_even will be: 0 2 4 6
3841 and of extract_odd: 1 3 5 7
3844 The permutation is done in log LENGTH stages. In each stage extract_even
3845 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3846 their order. In our example,
3848 E1: extract_even (1st vec, 2nd vec)
3849 E2: extract_odd (1st vec, 2nd vec)
3850 E3: extract_even (3rd vec, 4th vec)
3851 E4: extract_odd (3rd vec, 4th vec)
3853 The output for the first stage will be:
3855 E1: 0 2 4 6 8 10 12 14
3856 E2: 1 3 5 7 9 11 13 15
3857 E3: 16 18 20 22 24 26 28 30
3858 E4: 17 19 21 23 25 27 29 31
3860 In order to proceed and create the correct sequence for the next stage (or
3861 for the correct output, if the second stage is the last one, as in our
3862 example), we first put the output of extract_even operation and then the
3863 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3864 The input for the second stage is:
3866 1st vec (E1): 0 2 4 6 8 10 12 14
3867 2nd vec (E3): 16 18 20 22 24 26 28 30
3868 3rd vec (E2): 1 3 5 7 9 11 13 15
3869 4th vec (E4): 17 19 21 23 25 27 29 31
3871 The output of the second stage:
3873 E1: 0 4 8 12 16 20 24 28
3874 E2: 2 6 10 14 18 22 26 30
3875 E3: 1 5 9 13 17 21 25 29
3876 E4: 3 7 11 15 19 23 27 31
3878 And RESULT_CHAIN after reordering:
3880 1st vec (E1): 0 4 8 12 16 20 24 28
3881 2nd vec (E3): 1 5 9 13 17 21 25 29
3882 3rd vec (E2): 2 6 10 14 18 22 26 30
3883 4th vec (E4): 3 7 11 15 19 23 27 31. */
3885 bool
3886 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3887 unsigned int length,
3888 gimple stmt,
3889 gimple_stmt_iterator *gsi,
3890 VEC(tree,heap) **result_chain)
3892 tree perm_dest, data_ref, first_vect, second_vect;
3893 gimple perm_stmt;
3894 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3895 int i;
3896 unsigned int j;
3898 /* Check that the operation is supported. */
3899 if (!vect_strided_load_supported (vectype))
3900 return false;
3902 *result_chain = VEC_copy (tree, heap, dr_chain);
3903 for (i = 0; i < exact_log2 (length); i++)
3905 for (j = 0; j < length; j +=2)
3907 first_vect = VEC_index (tree, dr_chain, j);
3908 second_vect = VEC_index (tree, dr_chain, j+1);
3910 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3911 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3912 DECL_GIMPLE_REG_P (perm_dest) = 1;
3913 add_referenced_var (perm_dest);
3915 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3916 perm_dest, first_vect,
3917 second_vect);
3919 data_ref = make_ssa_name (perm_dest, perm_stmt);
3920 gimple_assign_set_lhs (perm_stmt, data_ref);
3921 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3922 mark_symbols_for_renaming (perm_stmt);
3924 VEC_replace (tree, *result_chain, j/2, data_ref);
3926 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3927 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3928 DECL_GIMPLE_REG_P (perm_dest) = 1;
3929 add_referenced_var (perm_dest);
3931 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3932 perm_dest, first_vect,
3933 second_vect);
3934 data_ref = make_ssa_name (perm_dest, perm_stmt);
3935 gimple_assign_set_lhs (perm_stmt, data_ref);
3936 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3937 mark_symbols_for_renaming (perm_stmt);
3939 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3941 dr_chain = VEC_copy (tree, heap, *result_chain);
3943 return true;
3947 /* Function vect_transform_strided_load.
3949 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3950 to perform their permutation and ascribe the result vectorized statements to
3951 the scalar statements.
3954 bool
3955 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3956 gimple_stmt_iterator *gsi)
3958 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3959 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3960 gimple next_stmt, new_stmt;
3961 VEC(tree,heap) *result_chain = NULL;
3962 unsigned int i, gap_count;
3963 tree tmp_data_ref;
3965 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3966 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3967 vectors, that are ready for vector computation. */
3968 result_chain = VEC_alloc (tree, heap, size);
3969 /* Permute. */
3970 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3971 return false;
3973 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3974 Since we scan the chain starting from it's first node, their order
3975 corresponds the order of data-refs in RESULT_CHAIN. */
3976 next_stmt = first_stmt;
3977 gap_count = 1;
3978 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
3980 if (!next_stmt)
3981 break;
3983 /* Skip the gaps. Loads created for the gaps will be removed by dead
3984 code elimination pass later. No need to check for the first stmt in
3985 the group, since it always exists.
3986 DR_GROUP_GAP is the number of steps in elements from the previous
3987 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3988 correspond to the gaps. */
3989 if (next_stmt != first_stmt
3990 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3992 gap_count++;
3993 continue;
3996 while (next_stmt)
3998 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3999 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4000 copies, and we put the new vector statement in the first available
4001 RELATED_STMT. */
4002 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4003 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4004 else
4006 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4008 gimple prev_stmt =
4009 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4010 gimple rel_stmt =
4011 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4012 while (rel_stmt)
4014 prev_stmt = rel_stmt;
4015 rel_stmt =
4016 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4019 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4020 new_stmt;
4024 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4025 gap_count = 1;
4026 /* If NEXT_STMT accesses the same DR as the previous statement,
4027 put the same TMP_DATA_REF as its vectorized statement; otherwise
4028 get the next data-ref from RESULT_CHAIN. */
4029 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4030 break;
4034 VEC_free (tree, heap, result_chain);
4035 return true;
4038 /* Function vect_force_dr_alignment_p.
4040 Returns whether the alignment of a DECL can be forced to be aligned
4041 on ALIGNMENT bit boundary. */
4043 bool
4044 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4046 if (TREE_CODE (decl) != VAR_DECL)
4047 return false;
4049 if (DECL_EXTERNAL (decl))
4050 return false;
4052 if (TREE_ASM_WRITTEN (decl))
4053 return false;
4055 if (TREE_STATIC (decl))
4056 return (alignment <= MAX_OFILE_ALIGNMENT);
4057 else
4058 return (alignment <= MAX_STACK_ALIGNMENT);
4062 /* Return whether the data reference DR is supported with respect to its
4063 alignment.
4064 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4065 it is aligned, i.e., check if it is possible to vectorize it with different
4066 alignment. */
4068 enum dr_alignment_support
4069 vect_supportable_dr_alignment (struct data_reference *dr,
4070 bool check_aligned_accesses)
4072 gimple stmt = DR_STMT (dr);
4073 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4074 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4075 enum machine_mode mode = TYPE_MODE (vectype);
4076 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4077 struct loop *vect_loop = NULL;
4078 bool nested_in_vect_loop = false;
4080 if (aligned_access_p (dr) && !check_aligned_accesses)
4081 return dr_aligned;
4083 if (loop_vinfo)
4085 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4086 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4089 /* Possibly unaligned access. */
4091 /* We can choose between using the implicit realignment scheme (generating
4092 a misaligned_move stmt) and the explicit realignment scheme (generating
4093 aligned loads with a REALIGN_LOAD). There are two variants to the
4094 explicit realignment scheme: optimized, and unoptimized.
4095 We can optimize the realignment only if the step between consecutive
4096 vector loads is equal to the vector size. Since the vector memory
4097 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4098 is guaranteed that the misalignment amount remains the same throughout the
4099 execution of the vectorized loop. Therefore, we can create the
4100 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4101 at the loop preheader.
4103 However, in the case of outer-loop vectorization, when vectorizing a
4104 memory access in the inner-loop nested within the LOOP that is now being
4105 vectorized, while it is guaranteed that the misalignment of the
4106 vectorized memory access will remain the same in different outer-loop
4107 iterations, it is *not* guaranteed that is will remain the same throughout
4108 the execution of the inner-loop. This is because the inner-loop advances
4109 with the original scalar step (and not in steps of VS). If the inner-loop
4110 step happens to be a multiple of VS, then the misalignment remains fixed
4111 and we can use the optimized realignment scheme. For example:
4113 for (i=0; i<N; i++)
4114 for (j=0; j<M; j++)
4115 s += a[i+j];
4117 When vectorizing the i-loop in the above example, the step between
4118 consecutive vector loads is 1, and so the misalignment does not remain
4119 fixed across the execution of the inner-loop, and the realignment cannot
4120 be optimized (as illustrated in the following pseudo vectorized loop):
4122 for (i=0; i<N; i+=4)
4123 for (j=0; j<M; j++){
4124 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4125 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4126 // (assuming that we start from an aligned address).
4129 We therefore have to use the unoptimized realignment scheme:
4131 for (i=0; i<N; i+=4)
4132 for (j=k; j<M; j+=4)
4133 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4134 // that the misalignment of the initial address is
4135 // 0).
4137 The loop can then be vectorized as follows:
4139 for (k=0; k<4; k++){
4140 rt = get_realignment_token (&vp[k]);
4141 for (i=0; i<N; i+=4){
4142 v1 = vp[i+k];
4143 for (j=k; j<M; j+=4){
4144 v2 = vp[i+j+VS-1];
4145 va = REALIGN_LOAD <v1,v2,rt>;
4146 vs += va;
4147 v1 = v2;
4150 } */
4152 if (DR_IS_READ (dr))
4154 bool is_packed = false;
4155 tree type = (TREE_TYPE (DR_REF (dr)));
4157 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4158 && (!targetm.vectorize.builtin_mask_for_load
4159 || targetm.vectorize.builtin_mask_for_load ()))
4161 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4162 if ((nested_in_vect_loop
4163 && (TREE_INT_CST_LOW (DR_STEP (dr))
4164 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4165 || !loop_vinfo)
4166 return dr_explicit_realign;
4167 else
4168 return dr_explicit_realign_optimized;
4170 if (!known_alignment_for_access_p (dr))
4172 tree ba = DR_BASE_OBJECT (dr);
4174 if (ba)
4175 is_packed = contains_packed_reference (ba);
4178 if (targetm.vectorize.
4179 support_vector_misalignment (mode, type,
4180 DR_MISALIGNMENT (dr), is_packed))
4181 /* Can't software pipeline the loads, but can at least do them. */
4182 return dr_unaligned_supported;
4184 else
4186 bool is_packed = false;
4187 tree type = (TREE_TYPE (DR_REF (dr)));
4189 if (!known_alignment_for_access_p (dr))
4191 tree ba = DR_BASE_OBJECT (dr);
4193 if (ba)
4194 is_packed = contains_packed_reference (ba);
4197 if (targetm.vectorize.
4198 support_vector_misalignment (mode, type,
4199 DR_MISALIGNMENT (dr), is_packed))
4200 return dr_unaligned_supported;
4203 /* Unsupported. */
4204 return dr_unaligned_unsupported;