* pt.c (get_class_bindings): Call coerce_template_parms. Add
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob4a5624e10926a79058756babcda13123d69a2d76
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "dumpfile.h"
35 #include "cfgloop.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
39 #include "diagnostic-core.h"
41 /* Need to include rtl.h, expr.h, etc. for optabs. */
42 #include "expr.h"
43 #include "optabs.h"
45 /* Return true if load- or store-lanes optab OPTAB is implemented for
46 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
48 static bool
49 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
50 tree vectype, unsigned HOST_WIDE_INT count)
52 enum machine_mode mode, array_mode;
53 bool limit_p;
55 mode = TYPE_MODE (vectype);
56 limit_p = !targetm.array_mode_supported_p (mode, count);
57 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
58 MODE_INT, limit_p);
60 if (array_mode == BLKmode)
62 if (vect_print_dump_info (REPORT_DETAILS))
63 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
64 GET_MODE_NAME (mode), count);
65 return false;
68 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
70 if (vect_print_dump_info (REPORT_DETAILS))
71 fprintf (vect_dump, "cannot use %s<%s><%s>",
72 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
73 return false;
76 if (vect_print_dump_info (REPORT_DETAILS))
77 fprintf (vect_dump, "can use %s<%s><%s>",
78 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
80 return true;
84 /* Return the smallest scalar part of STMT.
85 This is used to determine the vectype of the stmt. We generally set the
86 vectype according to the type of the result (lhs). For stmts whose
87 result-type is different than the type of the arguments (e.g., demotion,
88 promotion), vectype will be reset appropriately (later). Note that we have
89 to visit the smallest datatype in this function, because that determines the
90 VF. If the smallest datatype in the loop is present only as the rhs of a
91 promotion operation - we'd miss it.
92 Such a case, where a variable of this datatype does not appear in the lhs
93 anywhere in the loop, can only occur if it's an invariant: e.g.:
94 'int_x = (int) short_inv', which we'd expect to have been optimized away by
95 invariant motion. However, we cannot rely on invariant motion to always
96 take invariants out of the loop, and so in the case of promotion we also
97 have to check the rhs.
98 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
99 types. */
101 tree
102 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
103 HOST_WIDE_INT *rhs_size_unit)
105 tree scalar_type = gimple_expr_type (stmt);
106 HOST_WIDE_INT lhs, rhs;
108 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
110 if (is_gimple_assign (stmt)
111 && (gimple_assign_cast_p (stmt)
112 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
113 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119 if (rhs < lhs)
120 scalar_type = rhs_type;
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
125 return scalar_type;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135 gimple next_stmt = first_stmt;
136 int result = 0;
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139 return -1;
141 while (next_stmt && next_stmt != stmt)
143 result++;
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
147 if (next_stmt)
148 return result;
149 else
150 return -1;
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
158 static void
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
162 gimple prev, next;
163 tree next_init;
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169 while (next)
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
174 /* Insert here. */
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177 return;
179 prev = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
196 FIRST_DR = DRB
197 NEXT_DR (DRB) = DRA
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
204 FIRST_DR = DRB
205 NEXT(FIRST_DR) = previous FIRST_DR
206 else
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
212 static void
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
220 tree node_init;
221 gimple node, prev, next, first_stmt;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229 return;
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
238 return;
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246 old_first_stmt)));
247 gimple tmp;
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
259 while (tmp)
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
265 else
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
271 return;
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
278 return;
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
291 else
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
301 while (node)
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305 while (next)
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
310 /* Insert here. */
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313 prev = node;
314 break;
316 prev = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
319 if (!next)
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324 prev = node;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
337 static bool
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342 gimple earlier_stmt;
344 /* We only call this function for pairs of loads and stores, but we verify
345 it here. */
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 if (DR_IS_READ (dra))
349 return false;
350 else
351 return true;
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
358 return true;
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
367 return true;
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
374 return false;
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383 return false;
385 return true;
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
394 static bool
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
406 return false;
408 /* Check:
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
412 data-refs' inits. */
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
420 return false;
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
426 if (init_a > init_b)
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
432 if (step && (init_a - init_b) > step)
433 return false;
435 if (diff_mod_size == 0)
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
445 return true;
448 else
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
454 if (step && (init_b - init_a) > step)
455 return false;
457 if (diff_mod_size == 0)
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
467 return true;
471 return false;
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
478 static bool
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489 return true;
490 else
491 return false;
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
497 static bool
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504 return true;
505 else
506 return false;
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
513 static bool
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519 return false;
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
529 if (optimize_loop_nest_for_size_p (loop))
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
533 return false;
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
537 if (loop->inner)
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541 return false;
544 /* FORNOW: We don't support creating runtime alias tests for non-constant
545 step. */
546 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
547 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
549 if (vect_print_dump_info (REPORT_DR_DETAILS))
550 fprintf (vect_dump, "versioning not yet supported for non-constant "
551 "step");
552 return false;
555 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
556 return true;
560 /* Function vect_analyze_data_ref_dependence.
562 Return TRUE if there (might) exist a dependence between a memory-reference
563 DRA and a memory-reference DRB. When versioning for alias may check a
564 dependence at run-time, return FALSE. Adjust *MAX_VF according to
565 the data dependence. */
567 static bool
568 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
569 loop_vec_info loop_vinfo, int *max_vf)
571 unsigned int i;
572 struct loop *loop = NULL;
573 struct data_reference *dra = DDR_A (ddr);
574 struct data_reference *drb = DDR_B (ddr);
575 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
576 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
577 lambda_vector dist_v;
578 unsigned int loop_depth;
580 /* Don't bother to analyze statements marked as unvectorizable. */
581 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
582 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
583 return false;
585 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
587 /* Independent data accesses. */
588 vect_check_interleaving (dra, drb);
589 return false;
592 if (loop_vinfo)
593 loop = LOOP_VINFO_LOOP (loop_vinfo);
595 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
596 return false;
598 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
600 gimple earlier_stmt;
602 if (loop_vinfo)
604 if (vect_print_dump_info (REPORT_DR_DETAILS))
606 fprintf (vect_dump, "versioning for alias required: "
607 "can't determine dependence between ");
608 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
609 fprintf (vect_dump, " and ");
610 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
613 /* Add to list of ddrs that need to be tested at run-time. */
614 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
617 /* When vectorizing a basic block unknown depnedence can still mean
618 grouped access. */
619 if (vect_check_interleaving (dra, drb))
620 return false;
622 /* Read-read is OK (we need this check here, after checking for
623 interleaving). */
624 if (DR_IS_READ (dra) && DR_IS_READ (drb))
625 return false;
627 if (vect_print_dump_info (REPORT_DR_DETAILS))
629 fprintf (vect_dump, "can't determine dependence between ");
630 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
631 fprintf (vect_dump, " and ");
632 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
635 /* We do not vectorize basic blocks with write-write dependencies. */
636 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
637 return true;
639 /* Check that it's not a load-after-store dependence. */
640 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
641 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
642 return true;
644 return false;
647 /* Versioning for alias is not yet supported for basic block SLP, and
648 dependence distance is unapplicable, hence, in case of known data
649 dependence, basic block vectorization is impossible for now. */
650 if (!loop_vinfo)
652 if (dra != drb && vect_check_interleaving (dra, drb))
653 return false;
655 if (vect_print_dump_info (REPORT_DR_DETAILS))
657 fprintf (vect_dump, "determined dependence between ");
658 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
659 fprintf (vect_dump, " and ");
660 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
663 /* Do not vectorize basic blcoks with write-write dependences. */
664 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
665 return true;
667 /* Check if this dependence is allowed in basic block vectorization. */
668 return vect_drs_dependent_in_basic_block (dra, drb);
671 /* Loop-based vectorization and known data dependence. */
672 if (DDR_NUM_DIST_VECTS (ddr) == 0)
674 if (vect_print_dump_info (REPORT_DR_DETAILS))
676 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
677 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
678 fprintf (vect_dump, " and ");
679 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
681 /* Add to list of ddrs that need to be tested at run-time. */
682 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
685 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
686 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
688 int dist = dist_v[loop_depth];
690 if (vect_print_dump_info (REPORT_DR_DETAILS))
691 fprintf (vect_dump, "dependence distance = %d.", dist);
693 if (dist == 0)
695 if (vect_print_dump_info (REPORT_DR_DETAILS))
697 fprintf (vect_dump, "dependence distance == 0 between ");
698 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
699 fprintf (vect_dump, " and ");
700 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
703 /* For interleaving, mark that there is a read-write dependency if
704 necessary. We check before that one of the data-refs is store. */
705 if (DR_IS_READ (dra))
706 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
707 else
709 if (DR_IS_READ (drb))
710 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
713 continue;
716 if (dist > 0 && DDR_REVERSED_P (ddr))
718 /* If DDR_REVERSED_P the order of the data-refs in DDR was
719 reversed (to make distance vector positive), and the actual
720 distance is negative. */
721 if (vect_print_dump_info (REPORT_DR_DETAILS))
722 fprintf (vect_dump, "dependence distance negative.");
723 continue;
726 if (abs (dist) >= 2
727 && abs (dist) < *max_vf)
729 /* The dependence distance requires reduction of the maximal
730 vectorization factor. */
731 *max_vf = abs (dist);
732 if (vect_print_dump_info (REPORT_DR_DETAILS))
733 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
734 *max_vf);
737 if (abs (dist) >= *max_vf)
739 /* Dependence distance does not create dependence, as far as
740 vectorization is concerned, in this case. */
741 if (vect_print_dump_info (REPORT_DR_DETAILS))
742 fprintf (vect_dump, "dependence distance >= VF.");
743 continue;
746 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
748 fprintf (vect_dump, "not vectorized, possible dependence "
749 "between data-refs ");
750 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
751 fprintf (vect_dump, " and ");
752 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
755 return true;
758 return false;
761 /* Function vect_analyze_data_ref_dependences.
763 Examine all the data references in the loop, and make sure there do not
764 exist any data dependences between them. Set *MAX_VF according to
765 the maximum vectorization factor the data dependences allow. */
767 bool
768 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
769 bb_vec_info bb_vinfo, int *max_vf)
771 unsigned int i;
772 VEC (ddr_p, heap) *ddrs = NULL;
773 struct data_dependence_relation *ddr;
775 if (vect_print_dump_info (REPORT_DETAILS))
776 fprintf (vect_dump, "=== vect_analyze_dependences ===");
778 if (loop_vinfo)
779 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
780 else
781 ddrs = BB_VINFO_DDRS (bb_vinfo);
783 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
784 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
785 return false;
787 return true;
791 /* Function vect_compute_data_ref_alignment
793 Compute the misalignment of the data reference DR.
795 Output:
796 1. If during the misalignment computation it is found that the data reference
797 cannot be vectorized then false is returned.
798 2. DR_MISALIGNMENT (DR) is defined.
800 FOR NOW: No analysis is actually performed. Misalignment is calculated
801 only for trivial cases. TODO. */
803 static bool
804 vect_compute_data_ref_alignment (struct data_reference *dr)
806 gimple stmt = DR_STMT (dr);
807 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
808 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
809 struct loop *loop = NULL;
810 tree ref = DR_REF (dr);
811 tree vectype;
812 tree base, base_addr;
813 bool base_aligned;
814 tree misalign;
815 tree aligned_to, alignment;
817 if (vect_print_dump_info (REPORT_DETAILS))
818 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
820 if (loop_vinfo)
821 loop = LOOP_VINFO_LOOP (loop_vinfo);
823 /* Initialize misalignment to unknown. */
824 SET_DR_MISALIGNMENT (dr, -1);
826 /* Strided loads perform only component accesses, misalignment information
827 is irrelevant for them. */
828 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
829 return true;
831 misalign = DR_INIT (dr);
832 aligned_to = DR_ALIGNED_TO (dr);
833 base_addr = DR_BASE_ADDRESS (dr);
834 vectype = STMT_VINFO_VECTYPE (stmt_info);
836 /* In case the dataref is in an inner-loop of the loop that is being
837 vectorized (LOOP), we use the base and misalignment information
838 relative to the outer-loop (LOOP). This is ok only if the misalignment
839 stays the same throughout the execution of the inner-loop, which is why
840 we have to check that the stride of the dataref in the inner-loop evenly
841 divides by the vector size. */
842 if (loop && nested_in_vect_loop_p (loop, stmt))
844 tree step = DR_STEP (dr);
845 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
847 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
849 if (vect_print_dump_info (REPORT_ALIGNMENT))
850 fprintf (vect_dump, "inner step divides the vector-size.");
851 misalign = STMT_VINFO_DR_INIT (stmt_info);
852 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
853 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
855 else
857 if (vect_print_dump_info (REPORT_ALIGNMENT))
858 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
859 misalign = NULL_TREE;
863 /* Similarly, if we're doing basic-block vectorization, we can only use
864 base and misalignment information relative to an innermost loop if the
865 misalignment stays the same throughout the execution of the loop.
866 As above, this is the case if the stride of the dataref evenly divides
867 by the vector size. */
868 if (!loop)
870 tree step = DR_STEP (dr);
871 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
873 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
875 if (vect_print_dump_info (REPORT_ALIGNMENT))
876 fprintf (vect_dump, "SLP: step doesn't divide the vector-size.");
877 misalign = NULL_TREE;
881 base = build_fold_indirect_ref (base_addr);
882 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
884 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
885 || !misalign)
887 if (vect_print_dump_info (REPORT_ALIGNMENT))
889 fprintf (vect_dump, "Unknown alignment for access: ");
890 print_generic_expr (vect_dump, base, TDF_SLIM);
892 return true;
895 if ((DECL_P (base)
896 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
897 alignment) >= 0)
898 || (TREE_CODE (base_addr) == SSA_NAME
899 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
900 TREE_TYPE (base_addr)))),
901 alignment) >= 0)
902 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
903 base_aligned = true;
904 else
905 base_aligned = false;
907 if (!base_aligned)
909 /* Do not change the alignment of global variables here if
910 flag_section_anchors is enabled as we already generated
911 RTL for other functions. Most global variables should
912 have been aligned during the IPA increase_alignment pass. */
913 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
914 || (TREE_STATIC (base) && flag_section_anchors))
916 if (vect_print_dump_info (REPORT_DETAILS))
918 fprintf (vect_dump, "can't force alignment of ref: ");
919 print_generic_expr (vect_dump, ref, TDF_SLIM);
921 return true;
924 /* Force the alignment of the decl.
925 NOTE: This is the only change to the code we make during
926 the analysis phase, before deciding to vectorize the loop. */
927 if (vect_print_dump_info (REPORT_DETAILS))
929 fprintf (vect_dump, "force alignment of ");
930 print_generic_expr (vect_dump, ref, TDF_SLIM);
933 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
934 DECL_USER_ALIGN (base) = 1;
937 /* At this point we assume that the base is aligned. */
938 gcc_assert (base_aligned
939 || (TREE_CODE (base) == VAR_DECL
940 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
942 /* If this is a backward running DR then first access in the larger
943 vectype actually is N-1 elements before the address in the DR.
944 Adjust misalign accordingly. */
945 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
947 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
948 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
949 otherwise we wouldn't be here. */
950 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
951 /* PLUS because DR_STEP was negative. */
952 misalign = size_binop (PLUS_EXPR, misalign, offset);
955 /* Modulo alignment. */
956 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
958 if (!host_integerp (misalign, 1))
960 /* Negative or overflowed misalignment value. */
961 if (vect_print_dump_info (REPORT_DETAILS))
962 fprintf (vect_dump, "unexpected misalign value");
963 return false;
966 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
968 if (vect_print_dump_info (REPORT_DETAILS))
970 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
971 print_generic_expr (vect_dump, ref, TDF_SLIM);
974 return true;
978 /* Function vect_compute_data_refs_alignment
980 Compute the misalignment of data references in the loop.
981 Return FALSE if a data reference is found that cannot be vectorized. */
983 static bool
984 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
985 bb_vec_info bb_vinfo)
987 VEC (data_reference_p, heap) *datarefs;
988 struct data_reference *dr;
989 unsigned int i;
991 if (loop_vinfo)
992 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
993 else
994 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
996 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
997 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
998 && !vect_compute_data_ref_alignment (dr))
1000 if (bb_vinfo)
1002 /* Mark unsupported statement as unvectorizable. */
1003 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1004 continue;
1006 else
1007 return false;
1010 return true;
1014 /* Function vect_update_misalignment_for_peel
1016 DR - the data reference whose misalignment is to be adjusted.
1017 DR_PEEL - the data reference whose misalignment is being made
1018 zero in the vector loop by the peel.
1019 NPEEL - the number of iterations in the peel loop if the misalignment
1020 of DR_PEEL is known at compile time. */
1022 static void
1023 vect_update_misalignment_for_peel (struct data_reference *dr,
1024 struct data_reference *dr_peel, int npeel)
1026 unsigned int i;
1027 VEC(dr_p,heap) *same_align_drs;
1028 struct data_reference *current_dr;
1029 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1030 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1031 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1032 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1034 /* For interleaved data accesses the step in the loop must be multiplied by
1035 the size of the interleaving group. */
1036 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1037 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1038 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
1039 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1041 /* It can be assumed that the data refs with the same alignment as dr_peel
1042 are aligned in the vector loop. */
1043 same_align_drs
1044 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1045 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1047 if (current_dr != dr)
1048 continue;
1049 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1050 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1051 SET_DR_MISALIGNMENT (dr, 0);
1052 return;
1055 if (known_alignment_for_access_p (dr)
1056 && known_alignment_for_access_p (dr_peel))
1058 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1059 int misal = DR_MISALIGNMENT (dr);
1060 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1061 misal += negative ? -npeel * dr_size : npeel * dr_size;
1062 misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
1063 SET_DR_MISALIGNMENT (dr, misal);
1064 return;
1067 if (vect_print_dump_info (REPORT_DETAILS))
1068 fprintf (vect_dump, "Setting misalignment to -1.");
1069 SET_DR_MISALIGNMENT (dr, -1);
1073 /* Function vect_verify_datarefs_alignment
1075 Return TRUE if all data references in the loop can be
1076 handled with respect to alignment. */
1078 bool
1079 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1081 VEC (data_reference_p, heap) *datarefs;
1082 struct data_reference *dr;
1083 enum dr_alignment_support supportable_dr_alignment;
1084 unsigned int i;
1086 if (loop_vinfo)
1087 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1088 else
1089 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1091 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1093 gimple stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1096 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1097 continue;
1099 /* For interleaving, only the alignment of the first access matters.
1100 Skip statements marked as not vectorizable. */
1101 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
1102 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1103 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1104 continue;
1106 /* Strided loads perform only component accesses, alignment is
1107 irrelevant for them. */
1108 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1109 continue;
1111 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1112 if (!supportable_dr_alignment)
1114 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1116 if (DR_IS_READ (dr))
1117 fprintf (vect_dump,
1118 "not vectorized: unsupported unaligned load.");
1119 else
1120 fprintf (vect_dump,
1121 "not vectorized: unsupported unaligned store.");
1123 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1125 return false;
1127 if (supportable_dr_alignment != dr_aligned
1128 && vect_print_dump_info (REPORT_ALIGNMENT))
1129 fprintf (vect_dump, "Vectorizing an unaligned access.");
1131 return true;
1134 /* Given an memory reference EXP return whether its alignment is less
1135 than its size. */
1137 static bool
1138 not_size_aligned (tree exp)
1140 if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp)), 1))
1141 return true;
1143 return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp)))
1144 > get_object_alignment (exp));
1147 /* Function vector_alignment_reachable_p
1149 Return true if vector alignment for DR is reachable by peeling
1150 a few loop iterations. Return false otherwise. */
1152 static bool
1153 vector_alignment_reachable_p (struct data_reference *dr)
1155 gimple stmt = DR_STMT (dr);
1156 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1157 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1159 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1161 /* For interleaved access we peel only if number of iterations in
1162 the prolog loop ({VF - misalignment}), is a multiple of the
1163 number of the interleaved accesses. */
1164 int elem_size, mis_in_elements;
1165 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1167 /* FORNOW: handle only known alignment. */
1168 if (!known_alignment_for_access_p (dr))
1169 return false;
1171 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1172 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1174 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1175 return false;
1178 /* If misalignment is known at the compile time then allow peeling
1179 only if natural alignment is reachable through peeling. */
1180 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1182 HOST_WIDE_INT elmsize =
1183 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1184 if (vect_print_dump_info (REPORT_DETAILS))
1186 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1187 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1189 if (DR_MISALIGNMENT (dr) % elmsize)
1191 if (vect_print_dump_info (REPORT_DETAILS))
1192 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1193 return false;
1197 if (!known_alignment_for_access_p (dr))
1199 tree type = TREE_TYPE (DR_REF (dr));
1200 bool is_packed = not_size_aligned (DR_REF (dr));
1201 if (vect_print_dump_info (REPORT_DETAILS))
1202 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1203 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1204 return true;
1205 else
1206 return false;
1209 return true;
1213 /* Calculate the cost of the memory access represented by DR. */
1215 static void
1216 vect_get_data_access_cost (struct data_reference *dr,
1217 unsigned int *inside_cost,
1218 unsigned int *outside_cost,
1219 stmt_vector_for_cost *body_cost_vec)
1221 gimple stmt = DR_STMT (dr);
1222 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1223 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1224 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1225 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1226 int ncopies = vf / nunits;
1228 if (DR_IS_READ (dr))
1229 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1230 NULL, body_cost_vec, false);
1231 else
1232 vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1234 if (vect_print_dump_info (REPORT_COST))
1235 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1236 "outside_cost = %d.", *inside_cost, *outside_cost);
1240 static hashval_t
1241 vect_peeling_hash (const void *elem)
1243 const struct _vect_peel_info *peel_info;
1245 peel_info = (const struct _vect_peel_info *) elem;
1246 return (hashval_t) peel_info->npeel;
1250 static int
1251 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1253 const struct _vect_peel_info *a, *b;
1255 a = (const struct _vect_peel_info *) elem1;
1256 b = (const struct _vect_peel_info *) elem2;
1257 return (a->npeel == b->npeel);
1261 /* Insert DR into peeling hash table with NPEEL as key. */
1263 static void
1264 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1265 int npeel)
1267 struct _vect_peel_info elem, *slot;
1268 void **new_slot;
1269 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1271 elem.npeel = npeel;
1272 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1273 &elem);
1274 if (slot)
1275 slot->count++;
1276 else
1278 slot = XNEW (struct _vect_peel_info);
1279 slot->npeel = npeel;
1280 slot->dr = dr;
1281 slot->count = 1;
1282 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1283 INSERT);
1284 *new_slot = slot;
1287 if (!supportable_dr_alignment && !flag_vect_cost_model)
1288 slot->count += VECT_MAX_COST;
1292 /* Traverse peeling hash table to find peeling option that aligns maximum
1293 number of data accesses. */
1295 static int
1296 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1298 vect_peel_info elem = (vect_peel_info) *slot;
1299 vect_peel_extended_info max = (vect_peel_extended_info) data;
1301 if (elem->count > max->peel_info.count
1302 || (elem->count == max->peel_info.count
1303 && max->peel_info.npeel > elem->npeel))
1305 max->peel_info.npeel = elem->npeel;
1306 max->peel_info.count = elem->count;
1307 max->peel_info.dr = elem->dr;
1310 return 1;
1314 /* Traverse peeling hash table and calculate cost for each peeling option.
1315 Find the one with the lowest cost. */
1317 static int
1318 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1320 vect_peel_info elem = (vect_peel_info) *slot;
1321 vect_peel_extended_info min = (vect_peel_extended_info) data;
1322 int save_misalignment, dummy;
1323 unsigned int inside_cost = 0, outside_cost = 0, i;
1324 gimple stmt = DR_STMT (elem->dr);
1325 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1326 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1327 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1328 struct data_reference *dr;
1329 stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1330 int single_iter_cost;
1332 prologue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1333 body_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1334 epilogue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1336 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1338 stmt = DR_STMT (dr);
1339 stmt_info = vinfo_for_stmt (stmt);
1340 /* For interleaving, only the alignment of the first access
1341 matters. */
1342 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1343 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1344 continue;
1346 save_misalignment = DR_MISALIGNMENT (dr);
1347 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1348 vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1349 &body_cost_vec);
1350 SET_DR_MISALIGNMENT (dr, save_misalignment);
1353 single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1354 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1355 &dummy, single_iter_cost,
1356 &prologue_cost_vec,
1357 &epilogue_cost_vec);
1359 /* Prologue and epilogue costs are added to the target model later.
1360 These costs depend only on the scalar iteration cost, the
1361 number of peeling iterations finally chosen, and the number of
1362 misaligned statements. So discard the information found here. */
1363 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1364 VEC_free (stmt_info_for_cost, heap, epilogue_cost_vec);
1366 if (inside_cost < min->inside_cost
1367 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1369 min->inside_cost = inside_cost;
1370 min->outside_cost = outside_cost;
1371 VEC_free (stmt_info_for_cost, heap, min->body_cost_vec);
1372 min->body_cost_vec = body_cost_vec;
1373 min->peel_info.dr = elem->dr;
1374 min->peel_info.npeel = elem->npeel;
1376 else
1377 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1379 return 1;
1383 /* Choose best peeling option by traversing peeling hash table and either
1384 choosing an option with the lowest cost (if cost model is enabled) or the
1385 option that aligns as many accesses as possible. */
1387 static struct data_reference *
1388 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1389 unsigned int *npeel,
1390 stmt_vector_for_cost *body_cost_vec)
1392 struct _vect_peel_extended_info res;
1394 res.peel_info.dr = NULL;
1395 res.body_cost_vec = NULL;
1397 if (flag_vect_cost_model)
1399 res.inside_cost = INT_MAX;
1400 res.outside_cost = INT_MAX;
1401 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1402 vect_peeling_hash_get_lowest_cost, &res);
1404 else
1406 res.peel_info.count = 0;
1407 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1408 vect_peeling_hash_get_most_frequent, &res);
1411 *npeel = res.peel_info.npeel;
1412 *body_cost_vec = res.body_cost_vec;
1413 return res.peel_info.dr;
1417 /* Function vect_enhance_data_refs_alignment
1419 This pass will use loop versioning and loop peeling in order to enhance
1420 the alignment of data references in the loop.
1422 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1423 original loop is to be vectorized. Any other loops that are created by
1424 the transformations performed in this pass - are not supposed to be
1425 vectorized. This restriction will be relaxed.
1427 This pass will require a cost model to guide it whether to apply peeling
1428 or versioning or a combination of the two. For example, the scheme that
1429 intel uses when given a loop with several memory accesses, is as follows:
1430 choose one memory access ('p') which alignment you want to force by doing
1431 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1432 other accesses are not necessarily aligned, or (2) use loop versioning to
1433 generate one loop in which all accesses are aligned, and another loop in
1434 which only 'p' is necessarily aligned.
1436 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1437 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1438 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1440 Devising a cost model is the most critical aspect of this work. It will
1441 guide us on which access to peel for, whether to use loop versioning, how
1442 many versions to create, etc. The cost model will probably consist of
1443 generic considerations as well as target specific considerations (on
1444 powerpc for example, misaligned stores are more painful than misaligned
1445 loads).
1447 Here are the general steps involved in alignment enhancements:
1449 -- original loop, before alignment analysis:
1450 for (i=0; i<N; i++){
1451 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1452 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1455 -- After vect_compute_data_refs_alignment:
1456 for (i=0; i<N; i++){
1457 x = q[i]; # DR_MISALIGNMENT(q) = 3
1458 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1461 -- Possibility 1: we do loop versioning:
1462 if (p is aligned) {
1463 for (i=0; i<N; i++){ # loop 1A
1464 x = q[i]; # DR_MISALIGNMENT(q) = 3
1465 p[i] = y; # DR_MISALIGNMENT(p) = 0
1468 else {
1469 for (i=0; i<N; i++){ # loop 1B
1470 x = q[i]; # DR_MISALIGNMENT(q) = 3
1471 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1475 -- Possibility 2: we do loop peeling:
1476 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1477 x = q[i];
1478 p[i] = y;
1480 for (i = 3; i < N; i++){ # loop 2A
1481 x = q[i]; # DR_MISALIGNMENT(q) = 0
1482 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1485 -- Possibility 3: combination of loop peeling and versioning:
1486 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1487 x = q[i];
1488 p[i] = y;
1490 if (p is aligned) {
1491 for (i = 3; i<N; i++){ # loop 3A
1492 x = q[i]; # DR_MISALIGNMENT(q) = 0
1493 p[i] = y; # DR_MISALIGNMENT(p) = 0
1496 else {
1497 for (i = 3; i<N; i++){ # loop 3B
1498 x = q[i]; # DR_MISALIGNMENT(q) = 0
1499 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1503 These loops are later passed to loop_transform to be vectorized. The
1504 vectorizer will use the alignment information to guide the transformation
1505 (whether to generate regular loads/stores, or with special handling for
1506 misalignment). */
1508 bool
1509 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1511 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1512 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1513 enum dr_alignment_support supportable_dr_alignment;
1514 struct data_reference *dr0 = NULL, *first_store = NULL;
1515 struct data_reference *dr;
1516 unsigned int i, j;
1517 bool do_peeling = false;
1518 bool do_versioning = false;
1519 bool stat;
1520 gimple stmt;
1521 stmt_vec_info stmt_info;
1522 int vect_versioning_for_alias_required;
1523 unsigned int npeel = 0;
1524 bool all_misalignments_unknown = true;
1525 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1526 unsigned possible_npeel_number = 1;
1527 tree vectype;
1528 unsigned int nelements, mis, same_align_drs_max = 0;
1529 stmt_vector_for_cost body_cost_vec = NULL;
1531 if (vect_print_dump_info (REPORT_DETAILS))
1532 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1534 /* While cost model enhancements are expected in the future, the high level
1535 view of the code at this time is as follows:
1537 A) If there is a misaligned access then see if peeling to align
1538 this access can make all data references satisfy
1539 vect_supportable_dr_alignment. If so, update data structures
1540 as needed and return true.
1542 B) If peeling wasn't possible and there is a data reference with an
1543 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1544 then see if loop versioning checks can be used to make all data
1545 references satisfy vect_supportable_dr_alignment. If so, update
1546 data structures as needed and return true.
1548 C) If neither peeling nor versioning were successful then return false if
1549 any data reference does not satisfy vect_supportable_dr_alignment.
1551 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1553 Note, Possibility 3 above (which is peeling and versioning together) is not
1554 being done at this time. */
1556 /* (1) Peeling to force alignment. */
1558 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1559 Considerations:
1560 + How many accesses will become aligned due to the peeling
1561 - How many accesses will become unaligned due to the peeling,
1562 and the cost of misaligned accesses.
1563 - The cost of peeling (the extra runtime checks, the increase
1564 in code size). */
1566 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1568 stmt = DR_STMT (dr);
1569 stmt_info = vinfo_for_stmt (stmt);
1571 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1572 continue;
1574 /* For interleaving, only the alignment of the first access
1575 matters. */
1576 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1577 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1578 continue;
1580 /* FORNOW: Any strided load prevents peeling. The induction
1581 variable analysis will fail when the prologue loop is generated,
1582 and so we can't generate the new base for the pointer. */
1583 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1585 if (vect_print_dump_info (REPORT_DETAILS))
1586 fprintf (vect_dump, "strided load prevents peeling");
1587 do_peeling = false;
1588 break;
1591 /* For invariant accesses there is nothing to enhance. */
1592 if (integer_zerop (DR_STEP (dr)))
1593 continue;
1595 /* Strided loads perform only component accesses, alignment is
1596 irrelevant for them. */
1597 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1598 continue;
1600 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1601 do_peeling = vector_alignment_reachable_p (dr);
1602 if (do_peeling)
1604 if (known_alignment_for_access_p (dr))
1606 unsigned int npeel_tmp;
1607 bool negative = tree_int_cst_compare (DR_STEP (dr),
1608 size_zero_node) < 0;
1610 /* Save info about DR in the hash table. */
1611 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1612 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1613 htab_create (1, vect_peeling_hash,
1614 vect_peeling_hash_eq, free);
1616 vectype = STMT_VINFO_VECTYPE (stmt_info);
1617 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1618 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1619 TREE_TYPE (DR_REF (dr))));
1620 npeel_tmp = (negative
1621 ? (mis - nelements) : (nelements - mis))
1622 & (nelements - 1);
1624 /* For multiple types, it is possible that the bigger type access
1625 will have more than one peeling option. E.g., a loop with two
1626 types: one of size (vector size / 4), and the other one of
1627 size (vector size / 8). Vectorization factor will 8. If both
1628 access are misaligned by 3, the first one needs one scalar
1629 iteration to be aligned, and the second one needs 5. But the
1630 the first one will be aligned also by peeling 5 scalar
1631 iterations, and in that case both accesses will be aligned.
1632 Hence, except for the immediate peeling amount, we also want
1633 to try to add full vector size, while we don't exceed
1634 vectorization factor.
1635 We do this automtically for cost model, since we calculate cost
1636 for every peeling option. */
1637 if (!flag_vect_cost_model)
1638 possible_npeel_number = vf /nelements;
1640 /* Handle the aligned case. We may decide to align some other
1641 access, making DR unaligned. */
1642 if (DR_MISALIGNMENT (dr) == 0)
1644 npeel_tmp = 0;
1645 if (!flag_vect_cost_model)
1646 possible_npeel_number++;
1649 for (j = 0; j < possible_npeel_number; j++)
1651 gcc_assert (npeel_tmp <= vf);
1652 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1653 npeel_tmp += nelements;
1656 all_misalignments_unknown = false;
1657 /* Data-ref that was chosen for the case that all the
1658 misalignments are unknown is not relevant anymore, since we
1659 have a data-ref with known alignment. */
1660 dr0 = NULL;
1662 else
1664 /* If we don't know all the misalignment values, we prefer
1665 peeling for data-ref that has maximum number of data-refs
1666 with the same alignment, unless the target prefers to align
1667 stores over load. */
1668 if (all_misalignments_unknown)
1670 if (same_align_drs_max < VEC_length (dr_p,
1671 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1672 || !dr0)
1674 same_align_drs_max = VEC_length (dr_p,
1675 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1676 dr0 = dr;
1679 if (!first_store && DR_IS_WRITE (dr))
1680 first_store = dr;
1683 /* If there are both known and unknown misaligned accesses in the
1684 loop, we choose peeling amount according to the known
1685 accesses. */
1688 if (!supportable_dr_alignment)
1690 dr0 = dr;
1691 if (!first_store && DR_IS_WRITE (dr))
1692 first_store = dr;
1696 else
1698 if (!aligned_access_p (dr))
1700 if (vect_print_dump_info (REPORT_DETAILS))
1701 fprintf (vect_dump, "vector alignment may not be reachable");
1703 break;
1708 vect_versioning_for_alias_required
1709 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1711 /* Temporarily, if versioning for alias is required, we disable peeling
1712 until we support peeling and versioning. Often peeling for alignment
1713 will require peeling for loop-bound, which in turn requires that we
1714 know how to adjust the loop ivs after the loop. */
1715 if (vect_versioning_for_alias_required
1716 || !vect_can_advance_ivs_p (loop_vinfo)
1717 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1718 do_peeling = false;
1720 if (do_peeling && all_misalignments_unknown
1721 && vect_supportable_dr_alignment (dr0, false))
1724 /* Check if the target requires to prefer stores over loads, i.e., if
1725 misaligned stores are more expensive than misaligned loads (taking
1726 drs with same alignment into account). */
1727 if (first_store && DR_IS_READ (dr0))
1729 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1730 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1731 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1732 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1733 stmt_vector_for_cost dummy = VEC_alloc (stmt_info_for_cost, heap, 2);
1735 vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1736 &dummy);
1737 vect_get_data_access_cost (first_store, &store_inside_cost,
1738 &store_outside_cost, &dummy);
1740 VEC_free (stmt_info_for_cost, heap, dummy);
1742 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1743 aligning the load DR0). */
1744 load_inside_penalty = store_inside_cost;
1745 load_outside_penalty = store_outside_cost;
1746 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1747 (vinfo_for_stmt (DR_STMT (first_store))),
1748 i, dr);
1749 i++)
1750 if (DR_IS_READ (dr))
1752 load_inside_penalty += load_inside_cost;
1753 load_outside_penalty += load_outside_cost;
1755 else
1757 load_inside_penalty += store_inside_cost;
1758 load_outside_penalty += store_outside_cost;
1761 /* Calculate the penalty for leaving DR0 unaligned (by
1762 aligning the FIRST_STORE). */
1763 store_inside_penalty = load_inside_cost;
1764 store_outside_penalty = load_outside_cost;
1765 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1766 (vinfo_for_stmt (DR_STMT (dr0))),
1767 i, dr);
1768 i++)
1769 if (DR_IS_READ (dr))
1771 store_inside_penalty += load_inside_cost;
1772 store_outside_penalty += load_outside_cost;
1774 else
1776 store_inside_penalty += store_inside_cost;
1777 store_outside_penalty += store_outside_cost;
1780 if (load_inside_penalty > store_inside_penalty
1781 || (load_inside_penalty == store_inside_penalty
1782 && load_outside_penalty > store_outside_penalty))
1783 dr0 = first_store;
1786 /* In case there are only loads with different unknown misalignments, use
1787 peeling only if it may help to align other accesses in the loop. */
1788 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1789 (vinfo_for_stmt (DR_STMT (dr0))))
1790 && vect_supportable_dr_alignment (dr0, false)
1791 != dr_unaligned_supported)
1792 do_peeling = false;
1795 if (do_peeling && !dr0)
1797 /* Peeling is possible, but there is no data access that is not supported
1798 unless aligned. So we try to choose the best possible peeling. */
1800 /* We should get here only if there are drs with known misalignment. */
1801 gcc_assert (!all_misalignments_unknown);
1803 /* Choose the best peeling from the hash table. */
1804 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
1805 &body_cost_vec);
1806 if (!dr0 || !npeel)
1807 do_peeling = false;
1810 if (do_peeling)
1812 stmt = DR_STMT (dr0);
1813 stmt_info = vinfo_for_stmt (stmt);
1814 vectype = STMT_VINFO_VECTYPE (stmt_info);
1815 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1817 if (known_alignment_for_access_p (dr0))
1819 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1820 size_zero_node) < 0;
1821 if (!npeel)
1823 /* Since it's known at compile time, compute the number of
1824 iterations in the peeled loop (the peeling factor) for use in
1825 updating DR_MISALIGNMENT values. The peeling factor is the
1826 vectorization factor minus the misalignment as an element
1827 count. */
1828 mis = DR_MISALIGNMENT (dr0);
1829 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1830 npeel = ((negative ? mis - nelements : nelements - mis)
1831 & (nelements - 1));
1834 /* For interleaved data access every iteration accesses all the
1835 members of the group, therefore we divide the number of iterations
1836 by the group size. */
1837 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1838 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1839 npeel /= GROUP_SIZE (stmt_info);
1841 if (vect_print_dump_info (REPORT_DETAILS))
1842 fprintf (vect_dump, "Try peeling by %d", npeel);
1845 /* Ensure that all data refs can be vectorized after the peel. */
1846 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1848 int save_misalignment;
1850 if (dr == dr0)
1851 continue;
1853 stmt = DR_STMT (dr);
1854 stmt_info = vinfo_for_stmt (stmt);
1855 /* For interleaving, only the alignment of the first access
1856 matters. */
1857 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1858 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1859 continue;
1861 /* Strided loads perform only component accesses, alignment is
1862 irrelevant for them. */
1863 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1864 continue;
1866 save_misalignment = DR_MISALIGNMENT (dr);
1867 vect_update_misalignment_for_peel (dr, dr0, npeel);
1868 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1869 SET_DR_MISALIGNMENT (dr, save_misalignment);
1871 if (!supportable_dr_alignment)
1873 do_peeling = false;
1874 break;
1878 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1880 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1881 if (!stat)
1882 do_peeling = false;
1883 else
1885 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1886 return stat;
1890 if (do_peeling)
1892 stmt_info_for_cost *si;
1893 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
1895 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1896 If the misalignment of DR_i is identical to that of dr0 then set
1897 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1898 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1899 by the peeling factor times the element size of DR_i (MOD the
1900 vectorization factor times the size). Otherwise, the
1901 misalignment of DR_i must be set to unknown. */
1902 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1903 if (dr != dr0)
1904 vect_update_misalignment_for_peel (dr, dr0, npeel);
1906 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1907 if (npeel)
1908 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1909 else
1910 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1911 SET_DR_MISALIGNMENT (dr0, 0);
1912 if (vect_print_dump_info (REPORT_ALIGNMENT))
1913 fprintf (vect_dump, "Alignment of access forced using peeling.");
1915 if (vect_print_dump_info (REPORT_DETAILS))
1916 fprintf (vect_dump, "Peeling for alignment will be applied.");
1918 /* We've delayed passing the inside-loop peeling costs to the
1919 target cost model until we were sure peeling would happen.
1920 Do so now. */
1921 if (body_cost_vec)
1923 FOR_EACH_VEC_ELT (stmt_info_for_cost, body_cost_vec, i, si)
1925 struct _stmt_vec_info *stmt_info
1926 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1927 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1928 si->misalign, vect_body);
1930 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1933 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1934 gcc_assert (stat);
1935 return stat;
1939 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1941 /* (2) Versioning to force alignment. */
1943 /* Try versioning if:
1944 1) flag_tree_vect_loop_version is TRUE
1945 2) optimize loop for speed
1946 3) there is at least one unsupported misaligned data ref with an unknown
1947 misalignment, and
1948 4) all misaligned data refs with a known misalignment are supported, and
1949 5) the number of runtime alignment checks is within reason. */
1951 do_versioning =
1952 flag_tree_vect_loop_version
1953 && optimize_loop_nest_for_speed_p (loop)
1954 && (!loop->inner); /* FORNOW */
1956 if (do_versioning)
1958 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1960 stmt = DR_STMT (dr);
1961 stmt_info = vinfo_for_stmt (stmt);
1963 /* For interleaving, only the alignment of the first access
1964 matters. */
1965 if (aligned_access_p (dr)
1966 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1967 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1968 continue;
1970 /* Strided loads perform only component accesses, alignment is
1971 irrelevant for them. */
1972 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1973 continue;
1975 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1977 if (!supportable_dr_alignment)
1979 gimple stmt;
1980 int mask;
1981 tree vectype;
1983 if (known_alignment_for_access_p (dr)
1984 || VEC_length (gimple,
1985 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1986 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1988 do_versioning = false;
1989 break;
1992 stmt = DR_STMT (dr);
1993 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1994 gcc_assert (vectype);
1996 /* The rightmost bits of an aligned address must be zeros.
1997 Construct the mask needed for this test. For example,
1998 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1999 mask must be 15 = 0xf. */
2000 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2002 /* FORNOW: use the same mask to test all potentially unaligned
2003 references in the loop. The vectorizer currently supports
2004 a single vector size, see the reference to
2005 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2006 vectorization factor is computed. */
2007 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2008 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2009 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2010 VEC_safe_push (gimple, heap,
2011 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2012 DR_STMT (dr));
2016 /* Versioning requires at least one misaligned data reference. */
2017 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2018 do_versioning = false;
2019 else if (!do_versioning)
2020 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2023 if (do_versioning)
2025 VEC(gimple,heap) *may_misalign_stmts
2026 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2027 gimple stmt;
2029 /* It can now be assumed that the data references in the statements
2030 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2031 of the loop being vectorized. */
2032 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
2034 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2035 dr = STMT_VINFO_DATA_REF (stmt_info);
2036 SET_DR_MISALIGNMENT (dr, 0);
2037 if (vect_print_dump_info (REPORT_ALIGNMENT))
2038 fprintf (vect_dump, "Alignment of access forced using versioning.");
2041 if (vect_print_dump_info (REPORT_DETAILS))
2042 fprintf (vect_dump, "Versioning for alignment will be applied.");
2044 /* Peeling and versioning can't be done together at this time. */
2045 gcc_assert (! (do_peeling && do_versioning));
2047 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2048 gcc_assert (stat);
2049 return stat;
2052 /* This point is reached if neither peeling nor versioning is being done. */
2053 gcc_assert (! (do_peeling || do_versioning));
2055 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2056 return stat;
2060 /* Function vect_find_same_alignment_drs.
2062 Update group and alignment relations according to the chosen
2063 vectorization factor. */
2065 static void
2066 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
2067 loop_vec_info loop_vinfo)
2069 unsigned int i;
2070 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2071 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2072 struct data_reference *dra = DDR_A (ddr);
2073 struct data_reference *drb = DDR_B (ddr);
2074 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2075 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2076 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
2077 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
2078 lambda_vector dist_v;
2079 unsigned int loop_depth;
2081 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2082 return;
2084 if (dra == drb)
2085 return;
2087 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2088 return;
2090 /* Loop-based vectorization and known data dependence. */
2091 if (DDR_NUM_DIST_VECTS (ddr) == 0)
2092 return;
2094 /* Data-dependence analysis reports a distance vector of zero
2095 for data-references that overlap only in the first iteration
2096 but have different sign step (see PR45764).
2097 So as a sanity check require equal DR_STEP. */
2098 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2099 return;
2101 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
2102 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
2104 int dist = dist_v[loop_depth];
2106 if (vect_print_dump_info (REPORT_DR_DETAILS))
2107 fprintf (vect_dump, "dependence distance = %d.", dist);
2109 /* Same loop iteration. */
2110 if (dist == 0
2111 || (dist % vectorization_factor == 0 && dra_size == drb_size))
2113 /* Two references with distance zero have the same alignment. */
2114 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
2115 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
2116 if (vect_print_dump_info (REPORT_ALIGNMENT))
2117 fprintf (vect_dump, "accesses have the same alignment.");
2118 if (vect_print_dump_info (REPORT_DR_DETAILS))
2120 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
2121 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
2122 fprintf (vect_dump, " and ");
2123 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
2130 /* Function vect_analyze_data_refs_alignment
2132 Analyze the alignment of the data-references in the loop.
2133 Return FALSE if a data reference is found that cannot be vectorized. */
2135 bool
2136 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2137 bb_vec_info bb_vinfo)
2139 if (vect_print_dump_info (REPORT_DETAILS))
2140 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2142 /* Mark groups of data references with same alignment using
2143 data dependence information. */
2144 if (loop_vinfo)
2146 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2147 struct data_dependence_relation *ddr;
2148 unsigned int i;
2150 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2151 vect_find_same_alignment_drs (ddr, loop_vinfo);
2154 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2156 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2157 fprintf (vect_dump,
2158 "not vectorized: can't calculate alignment for data ref.");
2159 return false;
2162 return true;
2166 /* Analyze groups of accesses: check that DR belongs to a group of
2167 accesses of legal size, step, etc. Detect gaps, single element
2168 interleaving, and other special cases. Set grouped access info.
2169 Collect groups of strided stores for further use in SLP analysis. */
2171 static bool
2172 vect_analyze_group_access (struct data_reference *dr)
2174 tree step = DR_STEP (dr);
2175 tree scalar_type = TREE_TYPE (DR_REF (dr));
2176 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2177 gimple stmt = DR_STMT (dr);
2178 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2179 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2180 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2181 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2182 HOST_WIDE_INT groupsize, last_accessed_element = 1;
2183 bool slp_impossible = false;
2184 struct loop *loop = NULL;
2186 if (loop_vinfo)
2187 loop = LOOP_VINFO_LOOP (loop_vinfo);
2189 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2190 size of the interleaving group (including gaps). */
2191 groupsize = dr_step / type_size;
2193 /* Not consecutive access is possible only if it is a part of interleaving. */
2194 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2196 /* Check if it this DR is a part of interleaving, and is a single
2197 element of the group that is accessed in the loop. */
2199 /* Gaps are supported only for loads. STEP must be a multiple of the type
2200 size. The size of the group must be a power of 2. */
2201 if (DR_IS_READ (dr)
2202 && (dr_step % type_size) == 0
2203 && groupsize > 0
2204 && exact_log2 (groupsize) != -1)
2206 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2207 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2208 if (vect_print_dump_info (REPORT_DR_DETAILS))
2210 fprintf (vect_dump, "Detected single element interleaving ");
2211 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2212 fprintf (vect_dump, " step ");
2213 print_generic_expr (vect_dump, step, TDF_SLIM);
2216 if (loop_vinfo)
2218 if (vect_print_dump_info (REPORT_DETAILS))
2219 fprintf (vect_dump, "Data access with gaps requires scalar "
2220 "epilogue loop");
2221 if (loop->inner)
2223 if (vect_print_dump_info (REPORT_DETAILS))
2224 fprintf (vect_dump, "Peeling for outer loop is not"
2225 " supported");
2226 return false;
2229 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2232 return true;
2235 if (vect_print_dump_info (REPORT_DETAILS))
2237 fprintf (vect_dump, "not consecutive access ");
2238 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2241 if (bb_vinfo)
2243 /* Mark the statement as unvectorizable. */
2244 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2245 return true;
2248 return false;
2251 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2253 /* First stmt in the interleaving chain. Check the chain. */
2254 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2255 struct data_reference *data_ref = dr;
2256 unsigned int count = 1;
2257 tree next_step;
2258 tree prev_init = DR_INIT (data_ref);
2259 gimple prev = stmt;
2260 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2262 while (next)
2264 /* Skip same data-refs. In case that two or more stmts share
2265 data-ref (supported only for loads), we vectorize only the first
2266 stmt, and the rest get their vectorized loads from the first
2267 one. */
2268 if (!tree_int_cst_compare (DR_INIT (data_ref),
2269 DR_INIT (STMT_VINFO_DATA_REF (
2270 vinfo_for_stmt (next)))))
2272 if (DR_IS_WRITE (data_ref))
2274 if (vect_print_dump_info (REPORT_DETAILS))
2275 fprintf (vect_dump, "Two store stmts share the same dr.");
2276 return false;
2279 /* Check that there is no load-store dependencies for this loads
2280 to prevent a case of load-store-load to the same location. */
2281 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2282 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2284 if (vect_print_dump_info (REPORT_DETAILS))
2285 fprintf (vect_dump,
2286 "READ_WRITE dependence in interleaving.");
2287 return false;
2290 /* For load use the same data-ref load. */
2291 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2293 prev = next;
2294 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2295 continue;
2298 prev = next;
2300 /* Check that all the accesses have the same STEP. */
2301 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2302 if (tree_int_cst_compare (step, next_step))
2304 if (vect_print_dump_info (REPORT_DETAILS))
2305 fprintf (vect_dump, "not consecutive access in interleaving");
2306 return false;
2309 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2310 /* Check that the distance between two accesses is equal to the type
2311 size. Otherwise, we have gaps. */
2312 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2313 - TREE_INT_CST_LOW (prev_init)) / type_size;
2314 if (diff != 1)
2316 /* FORNOW: SLP of accesses with gaps is not supported. */
2317 slp_impossible = true;
2318 if (DR_IS_WRITE (data_ref))
2320 if (vect_print_dump_info (REPORT_DETAILS))
2321 fprintf (vect_dump, "interleaved store with gaps");
2322 return false;
2325 gaps += diff - 1;
2328 last_accessed_element += diff;
2330 /* Store the gap from the previous member of the group. If there is no
2331 gap in the access, GROUP_GAP is always 1. */
2332 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2334 prev_init = DR_INIT (data_ref);
2335 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2336 /* Count the number of data-refs in the chain. */
2337 count++;
2340 /* COUNT is the number of accesses found, we multiply it by the size of
2341 the type to get COUNT_IN_BYTES. */
2342 count_in_bytes = type_size * count;
2344 /* Check that the size of the interleaving (including gaps) is not
2345 greater than STEP. */
2346 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2348 if (vect_print_dump_info (REPORT_DETAILS))
2350 fprintf (vect_dump, "interleaving size is greater than step for ");
2351 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2353 return false;
2356 /* Check that the size of the interleaving is equal to STEP for stores,
2357 i.e., that there are no gaps. */
2358 if (dr_step && dr_step != count_in_bytes)
2360 if (DR_IS_READ (dr))
2362 slp_impossible = true;
2363 /* There is a gap after the last load in the group. This gap is a
2364 difference between the groupsize and the number of elements.
2365 When there is no gap, this difference should be 0. */
2366 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
2368 else
2370 if (vect_print_dump_info (REPORT_DETAILS))
2371 fprintf (vect_dump, "interleaved store with gaps");
2372 return false;
2376 /* Check that STEP is a multiple of type size. */
2377 if (dr_step && (dr_step % type_size) != 0)
2379 if (vect_print_dump_info (REPORT_DETAILS))
2381 fprintf (vect_dump, "step is not a multiple of type size: step ");
2382 print_generic_expr (vect_dump, step, TDF_SLIM);
2383 fprintf (vect_dump, " size ");
2384 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2385 TDF_SLIM);
2387 return false;
2390 if (groupsize == 0)
2391 groupsize = count;
2393 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2394 if (vect_print_dump_info (REPORT_DETAILS))
2395 fprintf (vect_dump, "Detected interleaving of size %d", (int)groupsize);
2397 /* SLP: create an SLP data structure for every interleaving group of
2398 stores for further analysis in vect_analyse_slp. */
2399 if (DR_IS_WRITE (dr) && !slp_impossible)
2401 if (loop_vinfo)
2402 VEC_safe_push (gimple, heap, LOOP_VINFO_GROUPED_STORES (loop_vinfo),
2403 stmt);
2404 if (bb_vinfo)
2405 VEC_safe_push (gimple, heap, BB_VINFO_GROUPED_STORES (bb_vinfo),
2406 stmt);
2409 /* There is a gap in the end of the group. */
2410 if (groupsize - last_accessed_element > 0 && loop_vinfo)
2412 if (vect_print_dump_info (REPORT_DETAILS))
2413 fprintf (vect_dump, "Data access with gaps requires scalar "
2414 "epilogue loop");
2415 if (loop->inner)
2417 if (vect_print_dump_info (REPORT_DETAILS))
2418 fprintf (vect_dump, "Peeling for outer loop is not supported");
2419 return false;
2422 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2426 return true;
2430 /* Analyze the access pattern of the data-reference DR.
2431 In case of non-consecutive accesses call vect_analyze_group_access() to
2432 analyze groups of accesses. */
2434 static bool
2435 vect_analyze_data_ref_access (struct data_reference *dr)
2437 tree step = DR_STEP (dr);
2438 tree scalar_type = TREE_TYPE (DR_REF (dr));
2439 gimple stmt = DR_STMT (dr);
2440 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2441 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2442 struct loop *loop = NULL;
2444 if (loop_vinfo)
2445 loop = LOOP_VINFO_LOOP (loop_vinfo);
2447 if (loop_vinfo && !step)
2449 if (vect_print_dump_info (REPORT_DETAILS))
2450 fprintf (vect_dump, "bad data-ref access in loop");
2451 return false;
2454 /* Allow invariant loads in loops. */
2455 if (loop_vinfo && integer_zerop (step))
2457 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2458 return DR_IS_READ (dr);
2461 if (loop && nested_in_vect_loop_p (loop, stmt))
2463 /* Interleaved accesses are not yet supported within outer-loop
2464 vectorization for references in the inner-loop. */
2465 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2467 /* For the rest of the analysis we use the outer-loop step. */
2468 step = STMT_VINFO_DR_STEP (stmt_info);
2469 if (integer_zerop (step))
2471 if (vect_print_dump_info (REPORT_ALIGNMENT))
2472 fprintf (vect_dump, "zero step in outer loop.");
2473 if (DR_IS_READ (dr))
2474 return true;
2475 else
2476 return false;
2480 /* Consecutive? */
2481 if (TREE_CODE (step) == INTEGER_CST)
2483 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2484 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2485 || (dr_step < 0
2486 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2488 /* Mark that it is not interleaving. */
2489 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2490 return true;
2494 if (loop && nested_in_vect_loop_p (loop, stmt))
2496 if (vect_print_dump_info (REPORT_ALIGNMENT))
2497 fprintf (vect_dump, "grouped access in outer loop.");
2498 return false;
2501 /* Assume this is a DR handled by non-constant strided load case. */
2502 if (TREE_CODE (step) != INTEGER_CST)
2503 return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2505 /* Not consecutive access - check if it's a part of interleaving group. */
2506 return vect_analyze_group_access (dr);
2510 /* Function vect_analyze_data_ref_accesses.
2512 Analyze the access pattern of all the data references in the loop.
2514 FORNOW: the only access pattern that is considered vectorizable is a
2515 simple step 1 (consecutive) access.
2517 FORNOW: handle only arrays and pointer accesses. */
2519 bool
2520 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2522 unsigned int i;
2523 VEC (data_reference_p, heap) *datarefs;
2524 struct data_reference *dr;
2526 if (vect_print_dump_info (REPORT_DETAILS))
2527 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2529 if (loop_vinfo)
2530 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2531 else
2532 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2534 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2535 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2536 && !vect_analyze_data_ref_access (dr))
2538 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2539 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2541 if (bb_vinfo)
2543 /* Mark the statement as not vectorizable. */
2544 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2545 continue;
2547 else
2548 return false;
2551 return true;
2554 /* Function vect_prune_runtime_alias_test_list.
2556 Prune a list of ddrs to be tested at run-time by versioning for alias.
2557 Return FALSE if resulting list of ddrs is longer then allowed by
2558 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2560 bool
2561 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2563 VEC (ddr_p, heap) * ddrs =
2564 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2565 unsigned i, j;
2567 if (vect_print_dump_info (REPORT_DETAILS))
2568 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2570 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2572 bool found;
2573 ddr_p ddr_i;
2575 ddr_i = VEC_index (ddr_p, ddrs, i);
2576 found = false;
2578 for (j = 0; j < i; j++)
2580 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2582 if (vect_vfa_range_equal (ddr_i, ddr_j))
2584 if (vect_print_dump_info (REPORT_DR_DETAILS))
2586 fprintf (vect_dump, "found equal ranges ");
2587 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2588 fprintf (vect_dump, ", ");
2589 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2590 fprintf (vect_dump, " and ");
2591 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2592 fprintf (vect_dump, ", ");
2593 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2595 found = true;
2596 break;
2600 if (found)
2602 VEC_ordered_remove (ddr_p, ddrs, i);
2603 continue;
2605 i++;
2608 if (VEC_length (ddr_p, ddrs) >
2609 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2611 if (vect_print_dump_info (REPORT_DR_DETAILS))
2613 fprintf (vect_dump,
2614 "disable versioning for alias - max number of generated "
2615 "checks exceeded.");
2618 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2620 return false;
2623 return true;
2626 /* Check whether a non-affine read in stmt is suitable for gather load
2627 and if so, return a builtin decl for that operation. */
2629 tree
2630 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2631 tree *offp, int *scalep)
2633 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2634 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2635 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2636 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2637 tree offtype = NULL_TREE;
2638 tree decl, base, off;
2639 enum machine_mode pmode;
2640 int punsignedp, pvolatilep;
2642 /* The gather builtins need address of the form
2643 loop_invariant + vector * {1, 2, 4, 8}
2645 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2646 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2647 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2648 multiplications and additions in it. To get a vector, we need
2649 a single SSA_NAME that will be defined in the loop and will
2650 contain everything that is not loop invariant and that can be
2651 vectorized. The following code attempts to find such a preexistng
2652 SSA_NAME OFF and put the loop invariants into a tree BASE
2653 that can be gimplified before the loop. */
2654 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2655 &pmode, &punsignedp, &pvolatilep, false);
2656 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2658 if (TREE_CODE (base) == MEM_REF)
2660 if (!integer_zerop (TREE_OPERAND (base, 1)))
2662 if (off == NULL_TREE)
2664 double_int moff = mem_ref_offset (base);
2665 off = double_int_to_tree (sizetype, moff);
2667 else
2668 off = size_binop (PLUS_EXPR, off,
2669 fold_convert (sizetype, TREE_OPERAND (base, 1)));
2671 base = TREE_OPERAND (base, 0);
2673 else
2674 base = build_fold_addr_expr (base);
2676 if (off == NULL_TREE)
2677 off = size_zero_node;
2679 /* If base is not loop invariant, either off is 0, then we start with just
2680 the constant offset in the loop invariant BASE and continue with base
2681 as OFF, otherwise give up.
2682 We could handle that case by gimplifying the addition of base + off
2683 into some SSA_NAME and use that as off, but for now punt. */
2684 if (!expr_invariant_in_loop_p (loop, base))
2686 if (!integer_zerop (off))
2687 return NULL_TREE;
2688 off = base;
2689 base = size_int (pbitpos / BITS_PER_UNIT);
2691 /* Otherwise put base + constant offset into the loop invariant BASE
2692 and continue with OFF. */
2693 else
2695 base = fold_convert (sizetype, base);
2696 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2699 /* OFF at this point may be either a SSA_NAME or some tree expression
2700 from get_inner_reference. Try to peel off loop invariants from it
2701 into BASE as long as possible. */
2702 STRIP_NOPS (off);
2703 while (offtype == NULL_TREE)
2705 enum tree_code code;
2706 tree op0, op1, add = NULL_TREE;
2708 if (TREE_CODE (off) == SSA_NAME)
2710 gimple def_stmt = SSA_NAME_DEF_STMT (off);
2712 if (expr_invariant_in_loop_p (loop, off))
2713 return NULL_TREE;
2715 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2716 break;
2718 op0 = gimple_assign_rhs1 (def_stmt);
2719 code = gimple_assign_rhs_code (def_stmt);
2720 op1 = gimple_assign_rhs2 (def_stmt);
2722 else
2724 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2725 return NULL_TREE;
2726 code = TREE_CODE (off);
2727 extract_ops_from_tree (off, &code, &op0, &op1);
2729 switch (code)
2731 case POINTER_PLUS_EXPR:
2732 case PLUS_EXPR:
2733 if (expr_invariant_in_loop_p (loop, op0))
2735 add = op0;
2736 off = op1;
2737 do_add:
2738 add = fold_convert (sizetype, add);
2739 if (scale != 1)
2740 add = size_binop (MULT_EXPR, add, size_int (scale));
2741 base = size_binop (PLUS_EXPR, base, add);
2742 continue;
2744 if (expr_invariant_in_loop_p (loop, op1))
2746 add = op1;
2747 off = op0;
2748 goto do_add;
2750 break;
2751 case MINUS_EXPR:
2752 if (expr_invariant_in_loop_p (loop, op1))
2754 add = fold_convert (sizetype, op1);
2755 add = size_binop (MINUS_EXPR, size_zero_node, add);
2756 off = op0;
2757 goto do_add;
2759 break;
2760 case MULT_EXPR:
2761 if (scale == 1 && host_integerp (op1, 0))
2763 scale = tree_low_cst (op1, 0);
2764 off = op0;
2765 continue;
2767 break;
2768 case SSA_NAME:
2769 off = op0;
2770 continue;
2771 CASE_CONVERT:
2772 if (!POINTER_TYPE_P (TREE_TYPE (op0))
2773 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2774 break;
2775 if (TYPE_PRECISION (TREE_TYPE (op0))
2776 == TYPE_PRECISION (TREE_TYPE (off)))
2778 off = op0;
2779 continue;
2781 if (TYPE_PRECISION (TREE_TYPE (op0))
2782 < TYPE_PRECISION (TREE_TYPE (off)))
2784 off = op0;
2785 offtype = TREE_TYPE (off);
2786 STRIP_NOPS (off);
2787 continue;
2789 break;
2790 default:
2791 break;
2793 break;
2796 /* If at the end OFF still isn't a SSA_NAME or isn't
2797 defined in the loop, punt. */
2798 if (TREE_CODE (off) != SSA_NAME
2799 || expr_invariant_in_loop_p (loop, off))
2800 return NULL_TREE;
2802 if (offtype == NULL_TREE)
2803 offtype = TREE_TYPE (off);
2805 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2806 offtype, scale);
2807 if (decl == NULL_TREE)
2808 return NULL_TREE;
2810 if (basep)
2811 *basep = base;
2812 if (offp)
2813 *offp = off;
2814 if (scalep)
2815 *scalep = scale;
2816 return decl;
2819 /* Check wether a non-affine load in STMT (being in the loop referred to
2820 in LOOP_VINFO) is suitable for handling as strided load. That is the case
2821 if its address is a simple induction variable. If so return the base
2822 of that induction variable in *BASEP and the (loop-invariant) step
2823 in *STEPP, both only when that pointer is non-zero.
2825 This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
2826 base pointer) only. */
2828 bool
2829 vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2830 tree *stepp)
2832 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2833 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2834 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2835 tree base, off;
2836 affine_iv iv;
2838 if (!DR_IS_READ (dr))
2839 return false;
2841 base = DR_REF (dr);
2843 if (TREE_CODE (base) == ARRAY_REF)
2845 off = TREE_OPERAND (base, 1);
2846 base = TREE_OPERAND (base, 0);
2848 else if (TREE_CODE (base) == MEM_REF)
2850 off = TREE_OPERAND (base, 0);
2851 base = TREE_OPERAND (base, 1);
2853 else
2854 return false;
2856 if (TREE_CODE (off) != SSA_NAME)
2857 return false;
2859 if (!expr_invariant_in_loop_p (loop, base)
2860 || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
2861 return false;
2863 if (basep)
2864 *basep = iv.base;
2865 if (stepp)
2866 *stepp = iv.step;
2867 return true;
2870 /* Function vect_analyze_data_refs.
2872 Find all the data references in the loop or basic block.
2874 The general structure of the analysis of data refs in the vectorizer is as
2875 follows:
2876 1- vect_analyze_data_refs(loop/bb): call
2877 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2878 in the loop/bb and their dependences.
2879 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2880 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2881 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2885 bool
2886 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2887 bb_vec_info bb_vinfo,
2888 int *min_vf)
2890 struct loop *loop = NULL;
2891 basic_block bb = NULL;
2892 unsigned int i;
2893 VEC (data_reference_p, heap) *datarefs;
2894 struct data_reference *dr;
2895 tree scalar_type;
2896 bool res, stop_bb_analysis = false;
2898 if (vect_print_dump_info (REPORT_DETAILS))
2899 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2901 if (loop_vinfo)
2903 loop = LOOP_VINFO_LOOP (loop_vinfo);
2904 res = compute_data_dependences_for_loop
2905 (loop, true,
2906 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2907 &LOOP_VINFO_DATAREFS (loop_vinfo),
2908 &LOOP_VINFO_DDRS (loop_vinfo));
2910 if (!res)
2912 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2913 fprintf (vect_dump, "not vectorized: loop contains function calls"
2914 " or data references that cannot be analyzed");
2915 return false;
2918 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2920 else
2922 gimple_stmt_iterator gsi;
2924 bb = BB_VINFO_BB (bb_vinfo);
2925 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2927 gimple stmt = gsi_stmt (gsi);
2928 if (!find_data_references_in_stmt (NULL, stmt,
2929 &BB_VINFO_DATAREFS (bb_vinfo)))
2931 /* Mark the rest of the basic-block as unvectorizable. */
2932 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2934 stmt = gsi_stmt (gsi);
2935 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
2937 break;
2940 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
2941 &BB_VINFO_DDRS (bb_vinfo), NULL, true))
2943 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2944 fprintf (vect_dump, "not vectorized: basic block contains function"
2945 " calls or data references that cannot be analyzed");
2946 return false;
2949 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2952 /* Go through the data-refs, check that the analysis succeeded. Update
2953 pointer from stmt_vec_info struct to DR and vectype. */
2955 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2957 gimple stmt;
2958 stmt_vec_info stmt_info;
2959 tree base, offset, init;
2960 bool gather = false;
2961 int vf;
2963 if (!dr || !DR_REF (dr))
2965 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2966 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2968 return false;
2971 stmt = DR_STMT (dr);
2972 stmt_info = vinfo_for_stmt (stmt);
2974 if (stop_bb_analysis)
2976 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2977 continue;
2980 /* Check that analysis of the data-ref succeeded. */
2981 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2982 || !DR_STEP (dr))
2984 /* If target supports vector gather loads, see if they can't
2985 be used. */
2986 if (loop_vinfo
2987 && DR_IS_READ (dr)
2988 && !TREE_THIS_VOLATILE (DR_REF (dr))
2989 && targetm.vectorize.builtin_gather != NULL
2990 && !nested_in_vect_loop_p (loop, stmt))
2992 struct data_reference *newdr
2993 = create_data_ref (NULL, loop_containing_stmt (stmt),
2994 DR_REF (dr), stmt, true);
2995 gcc_assert (newdr != NULL && DR_REF (newdr));
2996 if (DR_BASE_ADDRESS (newdr)
2997 && DR_OFFSET (newdr)
2998 && DR_INIT (newdr)
2999 && DR_STEP (newdr)
3000 && integer_zerop (DR_STEP (newdr)))
3002 dr = newdr;
3003 gather = true;
3005 else
3006 free_data_ref (newdr);
3009 if (!gather)
3011 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3013 fprintf (vect_dump, "not vectorized: data ref analysis "
3014 "failed ");
3015 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3018 if (bb_vinfo)
3020 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3021 stop_bb_analysis = true;
3022 continue;
3025 return false;
3029 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3031 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3032 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3033 "constant");
3035 if (bb_vinfo)
3037 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3038 stop_bb_analysis = true;
3039 continue;
3042 if (gather)
3043 free_data_ref (dr);
3044 return false;
3047 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3049 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3051 fprintf (vect_dump, "not vectorized: volatile type ");
3052 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3055 if (bb_vinfo)
3057 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3058 stop_bb_analysis = true;
3059 continue;
3062 return false;
3065 if (stmt_can_throw_internal (stmt))
3067 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3069 fprintf (vect_dump, "not vectorized: statement can throw an "
3070 "exception ");
3071 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3074 if (bb_vinfo)
3076 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3077 stop_bb_analysis = true;
3078 continue;
3081 if (gather)
3082 free_data_ref (dr);
3083 return false;
3086 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3087 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3089 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3091 fprintf (vect_dump, "not vectorized: statement is bitfield "
3092 "access ");
3093 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3096 if (bb_vinfo)
3098 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3099 stop_bb_analysis = true;
3100 continue;
3103 if (gather)
3104 free_data_ref (dr);
3105 return false;
3108 base = unshare_expr (DR_BASE_ADDRESS (dr));
3109 offset = unshare_expr (DR_OFFSET (dr));
3110 init = unshare_expr (DR_INIT (dr));
3112 if (is_gimple_call (stmt))
3114 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3116 fprintf (vect_dump, "not vectorized: dr in a call ");
3117 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3120 if (bb_vinfo)
3122 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3123 stop_bb_analysis = true;
3124 continue;
3127 if (gather)
3128 free_data_ref (dr);
3129 return false;
3132 /* Update DR field in stmt_vec_info struct. */
3134 /* If the dataref is in an inner-loop of the loop that is considered for
3135 for vectorization, we also want to analyze the access relative to
3136 the outer-loop (DR contains information only relative to the
3137 inner-most enclosing loop). We do that by building a reference to the
3138 first location accessed by the inner-loop, and analyze it relative to
3139 the outer-loop. */
3140 if (loop && nested_in_vect_loop_p (loop, stmt))
3142 tree outer_step, outer_base, outer_init;
3143 HOST_WIDE_INT pbitsize, pbitpos;
3144 tree poffset;
3145 enum machine_mode pmode;
3146 int punsignedp, pvolatilep;
3147 affine_iv base_iv, offset_iv;
3148 tree dinit;
3150 /* Build a reference to the first location accessed by the
3151 inner-loop: *(BASE+INIT). (The first location is actually
3152 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3153 tree inner_base = build_fold_indirect_ref
3154 (fold_build_pointer_plus (base, init));
3156 if (vect_print_dump_info (REPORT_DETAILS))
3158 fprintf (vect_dump, "analyze in outer-loop: ");
3159 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
3162 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3163 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3164 gcc_assert (outer_base != NULL_TREE);
3166 if (pbitpos % BITS_PER_UNIT != 0)
3168 if (vect_print_dump_info (REPORT_DETAILS))
3169 fprintf (vect_dump, "failed: bit offset alignment.\n");
3170 return false;
3173 outer_base = build_fold_addr_expr (outer_base);
3174 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3175 &base_iv, false))
3177 if (vect_print_dump_info (REPORT_DETAILS))
3178 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
3179 return false;
3182 if (offset)
3184 if (poffset)
3185 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
3186 poffset);
3187 else
3188 poffset = offset;
3191 if (!poffset)
3193 offset_iv.base = ssize_int (0);
3194 offset_iv.step = ssize_int (0);
3196 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3197 &offset_iv, false))
3199 if (vect_print_dump_info (REPORT_DETAILS))
3200 fprintf (vect_dump, "evolution of offset is not affine.\n");
3201 return false;
3204 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3205 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3206 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3207 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3208 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3210 outer_step = size_binop (PLUS_EXPR,
3211 fold_convert (ssizetype, base_iv.step),
3212 fold_convert (ssizetype, offset_iv.step));
3214 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3215 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3216 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3217 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3218 STMT_VINFO_DR_OFFSET (stmt_info) =
3219 fold_convert (ssizetype, offset_iv.base);
3220 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3221 size_int (highest_pow2_factor (offset_iv.base));
3223 if (vect_print_dump_info (REPORT_DETAILS))
3225 fprintf (vect_dump, "\touter base_address: ");
3226 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3227 fprintf (vect_dump, "\n\touter offset from base address: ");
3228 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3229 fprintf (vect_dump, "\n\touter constant offset from base address: ");
3230 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3231 fprintf (vect_dump, "\n\touter step: ");
3232 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3233 fprintf (vect_dump, "\n\touter aligned to: ");
3234 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3238 if (STMT_VINFO_DATA_REF (stmt_info))
3240 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3242 fprintf (vect_dump,
3243 "not vectorized: more than one data ref in stmt: ");
3244 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3247 if (bb_vinfo)
3249 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3250 stop_bb_analysis = true;
3251 continue;
3254 if (gather)
3255 free_data_ref (dr);
3256 return false;
3259 STMT_VINFO_DATA_REF (stmt_info) = dr;
3261 /* Set vectype for STMT. */
3262 scalar_type = TREE_TYPE (DR_REF (dr));
3263 STMT_VINFO_VECTYPE (stmt_info) =
3264 get_vectype_for_scalar_type (scalar_type);
3265 if (!STMT_VINFO_VECTYPE (stmt_info))
3267 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3269 fprintf (vect_dump,
3270 "not vectorized: no vectype for stmt: ");
3271 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3272 fprintf (vect_dump, " scalar_type: ");
3273 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3276 if (bb_vinfo)
3278 /* Mark the statement as not vectorizable. */
3279 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3280 stop_bb_analysis = true;
3281 continue;
3284 if (gather)
3286 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3287 free_data_ref (dr);
3289 return false;
3292 /* Adjust the minimal vectorization factor according to the
3293 vector type. */
3294 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3295 if (vf > *min_vf)
3296 *min_vf = vf;
3298 if (gather)
3300 unsigned int j, k, n;
3301 struct data_reference *olddr
3302 = VEC_index (data_reference_p, datarefs, i);
3303 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3304 struct data_dependence_relation *ddr, *newddr;
3305 bool bad = false;
3306 tree off;
3307 VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3309 gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3310 if (gather
3311 && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3312 gather = false;
3313 if (!gather)
3315 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3316 free_data_ref (dr);
3317 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3319 fprintf (vect_dump,
3320 "not vectorized: not suitable for gather load ");
3321 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3323 return false;
3326 n = VEC_length (data_reference_p, datarefs) - 1;
3327 for (j = 0, k = i - 1; j < i; j++)
3329 ddr = VEC_index (ddr_p, ddrs, k);
3330 gcc_assert (DDR_B (ddr) == olddr);
3331 newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3332 nest);
3333 VEC_replace (ddr_p, ddrs, k, newddr);
3334 free_dependence_relation (ddr);
3335 if (!bad
3336 && DR_IS_WRITE (DDR_A (newddr))
3337 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3338 bad = true;
3339 k += --n;
3342 k++;
3343 n = k + VEC_length (data_reference_p, datarefs) - i - 1;
3344 for (; k < n; k++)
3346 ddr = VEC_index (ddr_p, ddrs, k);
3347 gcc_assert (DDR_A (ddr) == olddr);
3348 newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3349 nest);
3350 VEC_replace (ddr_p, ddrs, k, newddr);
3351 free_dependence_relation (ddr);
3352 if (!bad
3353 && DR_IS_WRITE (DDR_B (newddr))
3354 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3355 bad = true;
3358 k = VEC_length (ddr_p, ddrs)
3359 - VEC_length (data_reference_p, datarefs) + i;
3360 ddr = VEC_index (ddr_p, ddrs, k);
3361 gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3362 newddr = initialize_data_dependence_relation (dr, dr, nest);
3363 VEC_replace (ddr_p, ddrs, k, newddr);
3364 free_dependence_relation (ddr);
3365 VEC_replace (data_reference_p, datarefs, i, dr);
3367 if (bad)
3369 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3371 fprintf (vect_dump,
3372 "not vectorized: data dependence conflict"
3373 " prevents gather load");
3374 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3376 return false;
3379 STMT_VINFO_GATHER_P (stmt_info) = true;
3381 else if (loop_vinfo
3382 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3384 bool strided_load = false;
3385 if (!nested_in_vect_loop_p (loop, stmt))
3386 strided_load
3387 = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
3388 if (!strided_load)
3390 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3392 fprintf (vect_dump,
3393 "not vectorized: not suitable for strided load ");
3394 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3396 return false;
3398 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
3402 return true;
3406 /* Function vect_get_new_vect_var.
3408 Returns a name for a new variable. The current naming scheme appends the
3409 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3410 the name of vectorizer generated variables, and appends that to NAME if
3411 provided. */
3413 tree
3414 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3416 const char *prefix;
3417 tree new_vect_var;
3419 switch (var_kind)
3421 case vect_simple_var:
3422 prefix = "vect_";
3423 break;
3424 case vect_scalar_var:
3425 prefix = "stmp_";
3426 break;
3427 case vect_pointer_var:
3428 prefix = "vect_p";
3429 break;
3430 default:
3431 gcc_unreachable ();
3434 if (name)
3436 char* tmp = concat (prefix, name, NULL);
3437 new_vect_var = create_tmp_reg (type, tmp);
3438 free (tmp);
3440 else
3441 new_vect_var = create_tmp_reg (type, prefix);
3443 return new_vect_var;
3447 /* Function vect_create_addr_base_for_vector_ref.
3449 Create an expression that computes the address of the first memory location
3450 that will be accessed for a data reference.
3452 Input:
3453 STMT: The statement containing the data reference.
3454 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3455 OFFSET: Optional. If supplied, it is be added to the initial address.
3456 LOOP: Specify relative to which loop-nest should the address be computed.
3457 For example, when the dataref is in an inner-loop nested in an
3458 outer-loop that is now being vectorized, LOOP can be either the
3459 outer-loop, or the inner-loop. The first memory location accessed
3460 by the following dataref ('in' points to short):
3462 for (i=0; i<N; i++)
3463 for (j=0; j<M; j++)
3464 s += in[i+j]
3466 is as follows:
3467 if LOOP=i_loop: &in (relative to i_loop)
3468 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3470 Output:
3471 1. Return an SSA_NAME whose value is the address of the memory location of
3472 the first vector of the data reference.
3473 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3474 these statement(s) which define the returned SSA_NAME.
3476 FORNOW: We are only handling array accesses with step 1. */
3478 tree
3479 vect_create_addr_base_for_vector_ref (gimple stmt,
3480 gimple_seq *new_stmt_list,
3481 tree offset,
3482 struct loop *loop)
3484 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3485 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3486 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3487 tree base_name;
3488 tree data_ref_base_var;
3489 tree vec_stmt;
3490 tree addr_base, addr_expr;
3491 tree dest;
3492 gimple_seq seq = NULL;
3493 tree base_offset = unshare_expr (DR_OFFSET (dr));
3494 tree init = unshare_expr (DR_INIT (dr));
3495 tree vect_ptr_type;
3496 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3497 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3498 tree base;
3500 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3502 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3504 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3506 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3507 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3508 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3511 if (loop_vinfo)
3512 base_name = build_fold_indirect_ref (data_ref_base);
3513 else
3515 base_offset = ssize_int (0);
3516 init = ssize_int (0);
3517 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
3520 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3521 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3522 data_ref_base_var);
3523 gimple_seq_add_seq (new_stmt_list, seq);
3525 /* Create base_offset */
3526 base_offset = size_binop (PLUS_EXPR,
3527 fold_convert (sizetype, base_offset),
3528 fold_convert (sizetype, init));
3529 dest = create_tmp_var (sizetype, "base_off");
3530 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3531 gimple_seq_add_seq (new_stmt_list, seq);
3533 if (offset)
3535 tree tmp = create_tmp_var (sizetype, "offset");
3537 offset = fold_build2 (MULT_EXPR, sizetype,
3538 fold_convert (sizetype, offset), step);
3539 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3540 base_offset, offset);
3541 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3542 gimple_seq_add_seq (new_stmt_list, seq);
3545 /* base + base_offset */
3546 if (loop_vinfo)
3547 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3548 else
3550 addr_base = build1 (ADDR_EXPR,
3551 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3552 unshare_expr (DR_REF (dr)));
3555 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3556 base = get_base_address (DR_REF (dr));
3557 if (base
3558 && TREE_CODE (base) == MEM_REF)
3559 vect_ptr_type
3560 = build_qualified_type (vect_ptr_type,
3561 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3563 vec_stmt = fold_convert (vect_ptr_type, addr_base);
3564 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3565 get_name (base_name));
3566 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3567 gimple_seq_add_seq (new_stmt_list, seq);
3569 if (DR_PTR_INFO (dr)
3570 && TREE_CODE (vec_stmt) == SSA_NAME)
3572 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3573 if (offset)
3574 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (vec_stmt));
3577 if (vect_print_dump_info (REPORT_DETAILS))
3579 fprintf (vect_dump, "created ");
3580 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
3583 return vec_stmt;
3587 /* Function vect_create_data_ref_ptr.
3589 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3590 location accessed in the loop by STMT, along with the def-use update
3591 chain to appropriately advance the pointer through the loop iterations.
3592 Also set aliasing information for the pointer. This pointer is used by
3593 the callers to this function to create a memory reference expression for
3594 vector load/store access.
3596 Input:
3597 1. STMT: a stmt that references memory. Expected to be of the form
3598 GIMPLE_ASSIGN <name, data-ref> or
3599 GIMPLE_ASSIGN <data-ref, name>.
3600 2. AGGR_TYPE: the type of the reference, which should be either a vector
3601 or an array.
3602 3. AT_LOOP: the loop where the vector memref is to be created.
3603 4. OFFSET (optional): an offset to be added to the initial address accessed
3604 by the data-ref in STMT.
3605 5. BSI: location where the new stmts are to be placed if there is no loop
3606 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3607 pointing to the initial address.
3609 Output:
3610 1. Declare a new ptr to vector_type, and have it point to the base of the
3611 data reference (initial addressed accessed by the data reference).
3612 For example, for vector of type V8HI, the following code is generated:
3614 v8hi *ap;
3615 ap = (v8hi *)initial_address;
3617 if OFFSET is not supplied:
3618 initial_address = &a[init];
3619 if OFFSET is supplied:
3620 initial_address = &a[init + OFFSET];
3622 Return the initial_address in INITIAL_ADDRESS.
3624 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3625 update the pointer in each iteration of the loop.
3627 Return the increment stmt that updates the pointer in PTR_INCR.
3629 3. Set INV_P to true if the access pattern of the data reference in the
3630 vectorized loop is invariant. Set it to false otherwise.
3632 4. Return the pointer. */
3634 tree
3635 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3636 tree offset, tree *initial_address,
3637 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3638 bool only_init, bool *inv_p)
3640 tree base_name;
3641 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3642 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3643 struct loop *loop = NULL;
3644 bool nested_in_vect_loop = false;
3645 struct loop *containing_loop = NULL;
3646 tree aggr_ptr_type;
3647 tree aggr_ptr;
3648 tree new_temp;
3649 gimple vec_stmt;
3650 gimple_seq new_stmt_list = NULL;
3651 edge pe = NULL;
3652 basic_block new_bb;
3653 tree aggr_ptr_init;
3654 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3655 tree aptr;
3656 gimple_stmt_iterator incr_gsi;
3657 bool insert_after;
3658 bool negative;
3659 tree indx_before_incr, indx_after_incr;
3660 gimple incr;
3661 tree step;
3662 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3663 tree base;
3665 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3666 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3668 if (loop_vinfo)
3670 loop = LOOP_VINFO_LOOP (loop_vinfo);
3671 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3672 containing_loop = (gimple_bb (stmt))->loop_father;
3673 pe = loop_preheader_edge (loop);
3675 else
3677 gcc_assert (bb_vinfo);
3678 only_init = true;
3679 *ptr_incr = NULL;
3682 /* Check the step (evolution) of the load in LOOP, and record
3683 whether it's invariant. */
3684 if (nested_in_vect_loop)
3685 step = STMT_VINFO_DR_STEP (stmt_info);
3686 else
3687 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3689 if (tree_int_cst_compare (step, size_zero_node) == 0)
3690 *inv_p = true;
3691 else
3692 *inv_p = false;
3693 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3695 /* Create an expression for the first address accessed by this load
3696 in LOOP. */
3697 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3699 if (vect_print_dump_info (REPORT_DETAILS))
3701 tree data_ref_base = base_name;
3702 fprintf (vect_dump, "create %s-pointer variable to type: ",
3703 tree_code_name[(int) TREE_CODE (aggr_type)]);
3704 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3705 if (TREE_CODE (data_ref_base) == VAR_DECL
3706 || TREE_CODE (data_ref_base) == ARRAY_REF)
3707 fprintf (vect_dump, " vectorizing an array ref: ");
3708 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3709 fprintf (vect_dump, " vectorizing a record based array ref: ");
3710 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3711 fprintf (vect_dump, " vectorizing a pointer ref: ");
3712 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3715 /* (1) Create the new aggregate-pointer variable. */
3716 aggr_ptr_type = build_pointer_type (aggr_type);
3717 base = get_base_address (DR_REF (dr));
3718 if (base
3719 && TREE_CODE (base) == MEM_REF)
3720 aggr_ptr_type
3721 = build_qualified_type (aggr_ptr_type,
3722 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3723 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3724 get_name (base_name));
3726 /* Vector and array types inherit the alias set of their component
3727 type by default so we need to use a ref-all pointer if the data
3728 reference does not conflict with the created aggregated data
3729 reference because it is not addressable. */
3730 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3731 get_alias_set (DR_REF (dr))))
3733 aggr_ptr_type
3734 = build_pointer_type_for_mode (aggr_type,
3735 TYPE_MODE (aggr_ptr_type), true);
3736 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3737 get_name (base_name));
3740 /* Likewise for any of the data references in the stmt group. */
3741 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3743 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3746 tree lhs = gimple_assign_lhs (orig_stmt);
3747 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3748 get_alias_set (lhs)))
3750 aggr_ptr_type
3751 = build_pointer_type_for_mode (aggr_type,
3752 TYPE_MODE (aggr_ptr_type), true);
3753 aggr_ptr
3754 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3755 get_name (base_name));
3756 break;
3759 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3761 while (orig_stmt);
3764 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3765 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3766 def-use update cycles for the pointer: one relative to the outer-loop
3767 (LOOP), which is what steps (3) and (4) below do. The other is relative
3768 to the inner-loop (which is the inner-most loop containing the dataref),
3769 and this is done be step (5) below.
3771 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3772 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3773 redundant. Steps (3),(4) create the following:
3775 vp0 = &base_addr;
3776 LOOP: vp1 = phi(vp0,vp2)
3779 vp2 = vp1 + step
3780 goto LOOP
3782 If there is an inner-loop nested in loop, then step (5) will also be
3783 applied, and an additional update in the inner-loop will be created:
3785 vp0 = &base_addr;
3786 LOOP: vp1 = phi(vp0,vp2)
3788 inner: vp3 = phi(vp1,vp4)
3789 vp4 = vp3 + inner_step
3790 if () goto inner
3792 vp2 = vp1 + step
3793 if () goto LOOP */
3795 /* (2) Calculate the initial address of the aggregate-pointer, and set
3796 the aggregate-pointer to point to it before the loop. */
3798 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3800 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3801 offset, loop);
3802 if (new_stmt_list)
3804 if (pe)
3806 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3807 gcc_assert (!new_bb);
3809 else
3810 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3813 *initial_address = new_temp;
3815 /* Create: p = (aggr_type *) initial_base */
3816 if (TREE_CODE (new_temp) != SSA_NAME
3817 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3819 vec_stmt = gimple_build_assign (aggr_ptr,
3820 fold_convert (aggr_ptr_type, new_temp));
3821 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3822 /* Copy the points-to information if it exists. */
3823 if (DR_PTR_INFO (dr))
3824 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3825 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3826 if (pe)
3828 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3829 gcc_assert (!new_bb);
3831 else
3832 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3834 else
3835 aggr_ptr_init = new_temp;
3837 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3838 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3839 inner-loop nested in LOOP (during outer-loop vectorization). */
3841 /* No update in loop is required. */
3842 if (only_init && (!loop_vinfo || at_loop == loop))
3843 aptr = aggr_ptr_init;
3844 else
3846 /* The step of the aggregate pointer is the type size. */
3847 tree step = TYPE_SIZE_UNIT (aggr_type);
3848 /* One exception to the above is when the scalar step of the load in
3849 LOOP is zero. In this case the step here is also zero. */
3850 if (*inv_p)
3851 step = size_zero_node;
3852 else if (negative)
3853 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3855 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3857 create_iv (aggr_ptr_init,
3858 fold_convert (aggr_ptr_type, step),
3859 aggr_ptr, loop, &incr_gsi, insert_after,
3860 &indx_before_incr, &indx_after_incr);
3861 incr = gsi_stmt (incr_gsi);
3862 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3864 /* Copy the points-to information if it exists. */
3865 if (DR_PTR_INFO (dr))
3867 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3868 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3870 if (ptr_incr)
3871 *ptr_incr = incr;
3873 aptr = indx_before_incr;
3876 if (!nested_in_vect_loop || only_init)
3877 return aptr;
3880 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3881 nested in LOOP, if exists. */
3883 gcc_assert (nested_in_vect_loop);
3884 if (!only_init)
3886 standard_iv_increment_position (containing_loop, &incr_gsi,
3887 &insert_after);
3888 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3889 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3890 &indx_after_incr);
3891 incr = gsi_stmt (incr_gsi);
3892 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3894 /* Copy the points-to information if it exists. */
3895 if (DR_PTR_INFO (dr))
3897 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3898 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3900 if (ptr_incr)
3901 *ptr_incr = incr;
3903 return indx_before_incr;
3905 else
3906 gcc_unreachable ();
3910 /* Function bump_vector_ptr
3912 Increment a pointer (to a vector type) by vector-size. If requested,
3913 i.e. if PTR-INCR is given, then also connect the new increment stmt
3914 to the existing def-use update-chain of the pointer, by modifying
3915 the PTR_INCR as illustrated below:
3917 The pointer def-use update-chain before this function:
3918 DATAREF_PTR = phi (p_0, p_2)
3919 ....
3920 PTR_INCR: p_2 = DATAREF_PTR + step
3922 The pointer def-use update-chain after this function:
3923 DATAREF_PTR = phi (p_0, p_2)
3924 ....
3925 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3926 ....
3927 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3929 Input:
3930 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3931 in the loop.
3932 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3933 the loop. The increment amount across iterations is expected
3934 to be vector_size.
3935 BSI - location where the new update stmt is to be placed.
3936 STMT - the original scalar memory-access stmt that is being vectorized.
3937 BUMP - optional. The offset by which to bump the pointer. If not given,
3938 the offset is assumed to be vector_size.
3940 Output: Return NEW_DATAREF_PTR as illustrated above.
3944 tree
3945 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3946 gimple stmt, tree bump)
3948 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3949 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3950 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3951 tree update = TYPE_SIZE_UNIT (vectype);
3952 gimple incr_stmt;
3953 ssa_op_iter iter;
3954 use_operand_p use_p;
3955 tree new_dataref_ptr;
3957 if (bump)
3958 update = bump;
3960 new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
3961 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
3962 dataref_ptr, update);
3963 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3965 /* Copy the points-to information if it exists. */
3966 if (DR_PTR_INFO (dr))
3968 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3969 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
3972 if (!ptr_incr)
3973 return new_dataref_ptr;
3975 /* Update the vector-pointer's cross-iteration increment. */
3976 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3978 tree use = USE_FROM_PTR (use_p);
3980 if (use == dataref_ptr)
3981 SET_USE (use_p, new_dataref_ptr);
3982 else
3983 gcc_assert (tree_int_cst_compare (use, update) == 0);
3986 return new_dataref_ptr;
3990 /* Function vect_create_destination_var.
3992 Create a new temporary of type VECTYPE. */
3994 tree
3995 vect_create_destination_var (tree scalar_dest, tree vectype)
3997 tree vec_dest;
3998 const char *new_name;
3999 tree type;
4000 enum vect_var_kind kind;
4002 kind = vectype ? vect_simple_var : vect_scalar_var;
4003 type = vectype ? vectype : TREE_TYPE (scalar_dest);
4005 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4007 new_name = get_name (scalar_dest);
4008 if (!new_name)
4009 new_name = "var_";
4010 vec_dest = vect_get_new_vect_var (type, kind, new_name);
4012 return vec_dest;
4015 /* Function vect_grouped_store_supported.
4017 Returns TRUE if interleave high and interleave low permutations
4018 are supported, and FALSE otherwise. */
4020 bool
4021 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
4023 enum machine_mode mode = TYPE_MODE (vectype);
4025 /* vect_permute_store_chain requires the group size to be a power of two. */
4026 if (exact_log2 (count) == -1)
4028 if (vect_print_dump_info (REPORT_DETAILS))
4029 fprintf (vect_dump, "the size of the group of accesses"
4030 " is not a power of 2");
4031 return false;
4034 /* Check that the permutation is supported. */
4035 if (VECTOR_MODE_P (mode))
4037 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4038 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4039 for (i = 0; i < nelt / 2; i++)
4041 sel[i * 2] = i;
4042 sel[i * 2 + 1] = i + nelt;
4044 if (can_vec_perm_p (mode, false, sel))
4046 for (i = 0; i < nelt; i++)
4047 sel[i] += nelt / 2;
4048 if (can_vec_perm_p (mode, false, sel))
4049 return true;
4053 if (vect_print_dump_info (REPORT_DETAILS))
4054 fprintf (vect_dump, "interleave op not supported by target.");
4055 return false;
4059 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
4060 type VECTYPE. */
4062 bool
4063 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4065 return vect_lanes_optab_supported_p ("vec_store_lanes",
4066 vec_store_lanes_optab,
4067 vectype, count);
4071 /* Function vect_permute_store_chain.
4073 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4074 a power of 2, generate interleave_high/low stmts to reorder the data
4075 correctly for the stores. Return the final references for stores in
4076 RESULT_CHAIN.
4078 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4079 The input is 4 vectors each containing 8 elements. We assign a number to
4080 each element, the input sequence is:
4082 1st vec: 0 1 2 3 4 5 6 7
4083 2nd vec: 8 9 10 11 12 13 14 15
4084 3rd vec: 16 17 18 19 20 21 22 23
4085 4th vec: 24 25 26 27 28 29 30 31
4087 The output sequence should be:
4089 1st vec: 0 8 16 24 1 9 17 25
4090 2nd vec: 2 10 18 26 3 11 19 27
4091 3rd vec: 4 12 20 28 5 13 21 30
4092 4th vec: 6 14 22 30 7 15 23 31
4094 i.e., we interleave the contents of the four vectors in their order.
4096 We use interleave_high/low instructions to create such output. The input of
4097 each interleave_high/low operation is two vectors:
4098 1st vec 2nd vec
4099 0 1 2 3 4 5 6 7
4100 the even elements of the result vector are obtained left-to-right from the
4101 high/low elements of the first vector. The odd elements of the result are
4102 obtained left-to-right from the high/low elements of the second vector.
4103 The output of interleave_high will be: 0 4 1 5
4104 and of interleave_low: 2 6 3 7
4107 The permutation is done in log LENGTH stages. In each stage interleave_high
4108 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4109 where the first argument is taken from the first half of DR_CHAIN and the
4110 second argument from it's second half.
4111 In our example,
4113 I1: interleave_high (1st vec, 3rd vec)
4114 I2: interleave_low (1st vec, 3rd vec)
4115 I3: interleave_high (2nd vec, 4th vec)
4116 I4: interleave_low (2nd vec, 4th vec)
4118 The output for the first stage is:
4120 I1: 0 16 1 17 2 18 3 19
4121 I2: 4 20 5 21 6 22 7 23
4122 I3: 8 24 9 25 10 26 11 27
4123 I4: 12 28 13 29 14 30 15 31
4125 The output of the second stage, i.e. the final result is:
4127 I1: 0 8 16 24 1 9 17 25
4128 I2: 2 10 18 26 3 11 19 27
4129 I3: 4 12 20 28 5 13 21 30
4130 I4: 6 14 22 30 7 15 23 31. */
4132 void
4133 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
4134 unsigned int length,
4135 gimple stmt,
4136 gimple_stmt_iterator *gsi,
4137 VEC(tree,heap) **result_chain)
4139 tree vect1, vect2, high, low;
4140 gimple perm_stmt;
4141 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4142 tree perm_mask_low, perm_mask_high;
4143 unsigned int i, n;
4144 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
4145 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4147 *result_chain = VEC_copy (tree, heap, dr_chain);
4149 for (i = 0, n = nelt / 2; i < n; i++)
4151 sel[i * 2] = i;
4152 sel[i * 2 + 1] = i + nelt;
4154 perm_mask_high = vect_gen_perm_mask (vectype, sel);
4155 gcc_assert (perm_mask_high != NULL);
4157 for (i = 0; i < nelt; i++)
4158 sel[i] += nelt / 2;
4159 perm_mask_low = vect_gen_perm_mask (vectype, sel);
4160 gcc_assert (perm_mask_low != NULL);
4162 for (i = 0, n = exact_log2 (length); i < n; i++)
4164 for (j = 0; j < length/2; j++)
4166 vect1 = VEC_index (tree, dr_chain, j);
4167 vect2 = VEC_index (tree, dr_chain, j+length/2);
4169 /* Create interleaving stmt:
4170 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4171 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
4172 perm_stmt
4173 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, high,
4174 vect1, vect2, perm_mask_high);
4175 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4176 VEC_replace (tree, *result_chain, 2*j, high);
4178 /* Create interleaving stmt:
4179 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4180 nelt*3/2+1, ...}> */
4181 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
4182 perm_stmt
4183 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, low,
4184 vect1, vect2, perm_mask_low);
4185 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4186 VEC_replace (tree, *result_chain, 2*j+1, low);
4188 dr_chain = VEC_copy (tree, heap, *result_chain);
4192 /* Function vect_setup_realignment
4194 This function is called when vectorizing an unaligned load using
4195 the dr_explicit_realign[_optimized] scheme.
4196 This function generates the following code at the loop prolog:
4198 p = initial_addr;
4199 x msq_init = *(floor(p)); # prolog load
4200 realignment_token = call target_builtin;
4201 loop:
4202 x msq = phi (msq_init, ---)
4204 The stmts marked with x are generated only for the case of
4205 dr_explicit_realign_optimized.
4207 The code above sets up a new (vector) pointer, pointing to the first
4208 location accessed by STMT, and a "floor-aligned" load using that pointer.
4209 It also generates code to compute the "realignment-token" (if the relevant
4210 target hook was defined), and creates a phi-node at the loop-header bb
4211 whose arguments are the result of the prolog-load (created by this
4212 function) and the result of a load that takes place in the loop (to be
4213 created by the caller to this function).
4215 For the case of dr_explicit_realign_optimized:
4216 The caller to this function uses the phi-result (msq) to create the
4217 realignment code inside the loop, and sets up the missing phi argument,
4218 as follows:
4219 loop:
4220 msq = phi (msq_init, lsq)
4221 lsq = *(floor(p')); # load in loop
4222 result = realign_load (msq, lsq, realignment_token);
4224 For the case of dr_explicit_realign:
4225 loop:
4226 msq = *(floor(p)); # load in loop
4227 p' = p + (VS-1);
4228 lsq = *(floor(p')); # load in loop
4229 result = realign_load (msq, lsq, realignment_token);
4231 Input:
4232 STMT - (scalar) load stmt to be vectorized. This load accesses
4233 a memory location that may be unaligned.
4234 BSI - place where new code is to be inserted.
4235 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4236 is used.
4238 Output:
4239 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4240 target hook, if defined.
4241 Return value - the result of the loop-header phi node. */
4243 tree
4244 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4245 tree *realignment_token,
4246 enum dr_alignment_support alignment_support_scheme,
4247 tree init_addr,
4248 struct loop **at_loop)
4250 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4251 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4252 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4253 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4254 struct loop *loop = NULL;
4255 edge pe = NULL;
4256 tree scalar_dest = gimple_assign_lhs (stmt);
4257 tree vec_dest;
4258 gimple inc;
4259 tree ptr;
4260 tree data_ref;
4261 gimple new_stmt;
4262 basic_block new_bb;
4263 tree msq_init = NULL_TREE;
4264 tree new_temp;
4265 gimple phi_stmt;
4266 tree msq = NULL_TREE;
4267 gimple_seq stmts = NULL;
4268 bool inv_p;
4269 bool compute_in_loop = false;
4270 bool nested_in_vect_loop = false;
4271 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4272 struct loop *loop_for_initial_load = NULL;
4274 if (loop_vinfo)
4276 loop = LOOP_VINFO_LOOP (loop_vinfo);
4277 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4280 gcc_assert (alignment_support_scheme == dr_explicit_realign
4281 || alignment_support_scheme == dr_explicit_realign_optimized);
4283 /* We need to generate three things:
4284 1. the misalignment computation
4285 2. the extra vector load (for the optimized realignment scheme).
4286 3. the phi node for the two vectors from which the realignment is
4287 done (for the optimized realignment scheme). */
4289 /* 1. Determine where to generate the misalignment computation.
4291 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4292 calculation will be generated by this function, outside the loop (in the
4293 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4294 caller, inside the loop.
4296 Background: If the misalignment remains fixed throughout the iterations of
4297 the loop, then both realignment schemes are applicable, and also the
4298 misalignment computation can be done outside LOOP. This is because we are
4299 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4300 are a multiple of VS (the Vector Size), and therefore the misalignment in
4301 different vectorized LOOP iterations is always the same.
4302 The problem arises only if the memory access is in an inner-loop nested
4303 inside LOOP, which is now being vectorized using outer-loop vectorization.
4304 This is the only case when the misalignment of the memory access may not
4305 remain fixed throughout the iterations of the inner-loop (as explained in
4306 detail in vect_supportable_dr_alignment). In this case, not only is the
4307 optimized realignment scheme not applicable, but also the misalignment
4308 computation (and generation of the realignment token that is passed to
4309 REALIGN_LOAD) have to be done inside the loop.
4311 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4312 or not, which in turn determines if the misalignment is computed inside
4313 the inner-loop, or outside LOOP. */
4315 if (init_addr != NULL_TREE || !loop_vinfo)
4317 compute_in_loop = true;
4318 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4322 /* 2. Determine where to generate the extra vector load.
4324 For the optimized realignment scheme, instead of generating two vector
4325 loads in each iteration, we generate a single extra vector load in the
4326 preheader of the loop, and in each iteration reuse the result of the
4327 vector load from the previous iteration. In case the memory access is in
4328 an inner-loop nested inside LOOP, which is now being vectorized using
4329 outer-loop vectorization, we need to determine whether this initial vector
4330 load should be generated at the preheader of the inner-loop, or can be
4331 generated at the preheader of LOOP. If the memory access has no evolution
4332 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4333 to be generated inside LOOP (in the preheader of the inner-loop). */
4335 if (nested_in_vect_loop)
4337 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4338 bool invariant_in_outerloop =
4339 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4340 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4342 else
4343 loop_for_initial_load = loop;
4344 if (at_loop)
4345 *at_loop = loop_for_initial_load;
4347 if (loop_for_initial_load)
4348 pe = loop_preheader_edge (loop_for_initial_load);
4350 /* 3. For the case of the optimized realignment, create the first vector
4351 load at the loop preheader. */
4353 if (alignment_support_scheme == dr_explicit_realign_optimized)
4355 /* Create msq_init = *(floor(p1)) in the loop preheader */
4357 gcc_assert (!compute_in_loop);
4358 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4359 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4360 NULL_TREE, &init_addr, NULL, &inc,
4361 true, &inv_p);
4362 new_temp = copy_ssa_name (ptr, NULL);
4363 new_stmt = gimple_build_assign_with_ops
4364 (BIT_AND_EXPR, new_temp, ptr,
4365 build_int_cst (TREE_TYPE (ptr),
4366 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4367 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4368 gcc_assert (!new_bb);
4369 data_ref
4370 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4371 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4372 new_stmt = gimple_build_assign (vec_dest, data_ref);
4373 new_temp = make_ssa_name (vec_dest, new_stmt);
4374 gimple_assign_set_lhs (new_stmt, new_temp);
4375 if (pe)
4377 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4378 gcc_assert (!new_bb);
4380 else
4381 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4383 msq_init = gimple_assign_lhs (new_stmt);
4386 /* 4. Create realignment token using a target builtin, if available.
4387 It is done either inside the containing loop, or before LOOP (as
4388 determined above). */
4390 if (targetm.vectorize.builtin_mask_for_load)
4392 tree builtin_decl;
4394 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4395 if (!init_addr)
4397 /* Generate the INIT_ADDR computation outside LOOP. */
4398 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4399 NULL_TREE, loop);
4400 if (loop)
4402 pe = loop_preheader_edge (loop);
4403 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4404 gcc_assert (!new_bb);
4406 else
4407 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4410 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4411 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4412 vec_dest =
4413 vect_create_destination_var (scalar_dest,
4414 gimple_call_return_type (new_stmt));
4415 new_temp = make_ssa_name (vec_dest, new_stmt);
4416 gimple_call_set_lhs (new_stmt, new_temp);
4418 if (compute_in_loop)
4419 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4420 else
4422 /* Generate the misalignment computation outside LOOP. */
4423 pe = loop_preheader_edge (loop);
4424 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4425 gcc_assert (!new_bb);
4428 *realignment_token = gimple_call_lhs (new_stmt);
4430 /* The result of the CALL_EXPR to this builtin is determined from
4431 the value of the parameter and no global variables are touched
4432 which makes the builtin a "const" function. Requiring the
4433 builtin to have the "const" attribute makes it unnecessary
4434 to call mark_call_clobbered. */
4435 gcc_assert (TREE_READONLY (builtin_decl));
4438 if (alignment_support_scheme == dr_explicit_realign)
4439 return msq;
4441 gcc_assert (!compute_in_loop);
4442 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4445 /* 5. Create msq = phi <msq_init, lsq> in loop */
4447 pe = loop_preheader_edge (containing_loop);
4448 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4449 msq = make_ssa_name (vec_dest, NULL);
4450 phi_stmt = create_phi_node (msq, containing_loop->header);
4451 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4453 return msq;
4457 /* Function vect_grouped_load_supported.
4459 Returns TRUE if even and odd permutations are supported,
4460 and FALSE otherwise. */
4462 bool
4463 vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4465 enum machine_mode mode = TYPE_MODE (vectype);
4467 /* vect_permute_load_chain requires the group size to be a power of two. */
4468 if (exact_log2 (count) == -1)
4470 if (vect_print_dump_info (REPORT_DETAILS))
4471 fprintf (vect_dump, "the size of the group of accesses"
4472 " is not a power of 2");
4473 return false;
4476 /* Check that the permutation is supported. */
4477 if (VECTOR_MODE_P (mode))
4479 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4480 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4482 for (i = 0; i < nelt; i++)
4483 sel[i] = i * 2;
4484 if (can_vec_perm_p (mode, false, sel))
4486 for (i = 0; i < nelt; i++)
4487 sel[i] = i * 2 + 1;
4488 if (can_vec_perm_p (mode, false, sel))
4489 return true;
4493 if (vect_print_dump_info (REPORT_DETAILS))
4494 fprintf (vect_dump, "extract even/odd not supported by target");
4495 return false;
4498 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4499 type VECTYPE. */
4501 bool
4502 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4504 return vect_lanes_optab_supported_p ("vec_load_lanes",
4505 vec_load_lanes_optab,
4506 vectype, count);
4509 /* Function vect_permute_load_chain.
4511 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4512 a power of 2, generate extract_even/odd stmts to reorder the input data
4513 correctly. Return the final references for loads in RESULT_CHAIN.
4515 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4516 The input is 4 vectors each containing 8 elements. We assign a number to each
4517 element, the input sequence is:
4519 1st vec: 0 1 2 3 4 5 6 7
4520 2nd vec: 8 9 10 11 12 13 14 15
4521 3rd vec: 16 17 18 19 20 21 22 23
4522 4th vec: 24 25 26 27 28 29 30 31
4524 The output sequence should be:
4526 1st vec: 0 4 8 12 16 20 24 28
4527 2nd vec: 1 5 9 13 17 21 25 29
4528 3rd vec: 2 6 10 14 18 22 26 30
4529 4th vec: 3 7 11 15 19 23 27 31
4531 i.e., the first output vector should contain the first elements of each
4532 interleaving group, etc.
4534 We use extract_even/odd instructions to create such output. The input of
4535 each extract_even/odd operation is two vectors
4536 1st vec 2nd vec
4537 0 1 2 3 4 5 6 7
4539 and the output is the vector of extracted even/odd elements. The output of
4540 extract_even will be: 0 2 4 6
4541 and of extract_odd: 1 3 5 7
4544 The permutation is done in log LENGTH stages. In each stage extract_even
4545 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4546 their order. In our example,
4548 E1: extract_even (1st vec, 2nd vec)
4549 E2: extract_odd (1st vec, 2nd vec)
4550 E3: extract_even (3rd vec, 4th vec)
4551 E4: extract_odd (3rd vec, 4th vec)
4553 The output for the first stage will be:
4555 E1: 0 2 4 6 8 10 12 14
4556 E2: 1 3 5 7 9 11 13 15
4557 E3: 16 18 20 22 24 26 28 30
4558 E4: 17 19 21 23 25 27 29 31
4560 In order to proceed and create the correct sequence for the next stage (or
4561 for the correct output, if the second stage is the last one, as in our
4562 example), we first put the output of extract_even operation and then the
4563 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4564 The input for the second stage is:
4566 1st vec (E1): 0 2 4 6 8 10 12 14
4567 2nd vec (E3): 16 18 20 22 24 26 28 30
4568 3rd vec (E2): 1 3 5 7 9 11 13 15
4569 4th vec (E4): 17 19 21 23 25 27 29 31
4571 The output of the second stage:
4573 E1: 0 4 8 12 16 20 24 28
4574 E2: 2 6 10 14 18 22 26 30
4575 E3: 1 5 9 13 17 21 25 29
4576 E4: 3 7 11 15 19 23 27 31
4578 And RESULT_CHAIN after reordering:
4580 1st vec (E1): 0 4 8 12 16 20 24 28
4581 2nd vec (E3): 1 5 9 13 17 21 25 29
4582 3rd vec (E2): 2 6 10 14 18 22 26 30
4583 4th vec (E4): 3 7 11 15 19 23 27 31. */
4585 static void
4586 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4587 unsigned int length,
4588 gimple stmt,
4589 gimple_stmt_iterator *gsi,
4590 VEC(tree,heap) **result_chain)
4592 tree data_ref, first_vect, second_vect;
4593 tree perm_mask_even, perm_mask_odd;
4594 gimple perm_stmt;
4595 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4596 unsigned int i, j, log_length = exact_log2 (length);
4597 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4598 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4600 *result_chain = VEC_copy (tree, heap, dr_chain);
4602 for (i = 0; i < nelt; ++i)
4603 sel[i] = i * 2;
4604 perm_mask_even = vect_gen_perm_mask (vectype, sel);
4605 gcc_assert (perm_mask_even != NULL);
4607 for (i = 0; i < nelt; ++i)
4608 sel[i] = i * 2 + 1;
4609 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4610 gcc_assert (perm_mask_odd != NULL);
4612 for (i = 0; i < log_length; i++)
4614 for (j = 0; j < length; j += 2)
4616 first_vect = VEC_index (tree, dr_chain, j);
4617 second_vect = VEC_index (tree, dr_chain, j+1);
4619 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4620 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
4621 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4622 first_vect, second_vect,
4623 perm_mask_even);
4624 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4625 VEC_replace (tree, *result_chain, j/2, data_ref);
4627 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4628 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
4629 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4630 first_vect, second_vect,
4631 perm_mask_odd);
4632 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4633 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4635 dr_chain = VEC_copy (tree, heap, *result_chain);
4640 /* Function vect_transform_grouped_load.
4642 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4643 to perform their permutation and ascribe the result vectorized statements to
4644 the scalar statements.
4647 void
4648 vect_transform_grouped_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4649 gimple_stmt_iterator *gsi)
4651 VEC(tree,heap) *result_chain = NULL;
4653 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4654 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4655 vectors, that are ready for vector computation. */
4656 result_chain = VEC_alloc (tree, heap, size);
4657 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4658 vect_record_grouped_load_vectors (stmt, result_chain);
4659 VEC_free (tree, heap, result_chain);
4662 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4663 generated as part of the vectorization of STMT. Assign the statement
4664 for each vector to the associated scalar statement. */
4666 void
4667 vect_record_grouped_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4669 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4670 gimple next_stmt, new_stmt;
4671 unsigned int i, gap_count;
4672 tree tmp_data_ref;
4674 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4675 Since we scan the chain starting from it's first node, their order
4676 corresponds the order of data-refs in RESULT_CHAIN. */
4677 next_stmt = first_stmt;
4678 gap_count = 1;
4679 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4681 if (!next_stmt)
4682 break;
4684 /* Skip the gaps. Loads created for the gaps will be removed by dead
4685 code elimination pass later. No need to check for the first stmt in
4686 the group, since it always exists.
4687 GROUP_GAP is the number of steps in elements from the previous
4688 access (if there is no gap GROUP_GAP is 1). We skip loads that
4689 correspond to the gaps. */
4690 if (next_stmt != first_stmt
4691 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4693 gap_count++;
4694 continue;
4697 while (next_stmt)
4699 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4700 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4701 copies, and we put the new vector statement in the first available
4702 RELATED_STMT. */
4703 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4704 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4705 else
4707 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4709 gimple prev_stmt =
4710 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4711 gimple rel_stmt =
4712 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4713 while (rel_stmt)
4715 prev_stmt = rel_stmt;
4716 rel_stmt =
4717 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4720 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4721 new_stmt;
4725 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4726 gap_count = 1;
4727 /* If NEXT_STMT accesses the same DR as the previous statement,
4728 put the same TMP_DATA_REF as its vectorized statement; otherwise
4729 get the next data-ref from RESULT_CHAIN. */
4730 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4731 break;
4736 /* Function vect_force_dr_alignment_p.
4738 Returns whether the alignment of a DECL can be forced to be aligned
4739 on ALIGNMENT bit boundary. */
4741 bool
4742 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4744 if (TREE_CODE (decl) != VAR_DECL)
4745 return false;
4747 /* We cannot change alignment of common or external symbols as another
4748 translation unit may contain a definition with lower alignment.
4749 The rules of common symbol linking mean that the definition
4750 will override the common symbol. */
4751 if (DECL_EXTERNAL (decl)
4752 || DECL_COMMON (decl))
4753 return false;
4755 if (TREE_ASM_WRITTEN (decl))
4756 return false;
4758 /* Do not override the alignment as specified by the ABI when the used
4759 attribute is set. */
4760 if (DECL_PRESERVE_P (decl))
4761 return false;
4763 if (TREE_STATIC (decl))
4764 return (alignment <= MAX_OFILE_ALIGNMENT);
4765 else
4766 return (alignment <= MAX_STACK_ALIGNMENT);
4770 /* Return whether the data reference DR is supported with respect to its
4771 alignment.
4772 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4773 it is aligned, i.e., check if it is possible to vectorize it with different
4774 alignment. */
4776 enum dr_alignment_support
4777 vect_supportable_dr_alignment (struct data_reference *dr,
4778 bool check_aligned_accesses)
4780 gimple stmt = DR_STMT (dr);
4781 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4782 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4783 enum machine_mode mode = TYPE_MODE (vectype);
4784 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4785 struct loop *vect_loop = NULL;
4786 bool nested_in_vect_loop = false;
4788 if (aligned_access_p (dr) && !check_aligned_accesses)
4789 return dr_aligned;
4791 if (loop_vinfo)
4793 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4794 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4797 /* Possibly unaligned access. */
4799 /* We can choose between using the implicit realignment scheme (generating
4800 a misaligned_move stmt) and the explicit realignment scheme (generating
4801 aligned loads with a REALIGN_LOAD). There are two variants to the
4802 explicit realignment scheme: optimized, and unoptimized.
4803 We can optimize the realignment only if the step between consecutive
4804 vector loads is equal to the vector size. Since the vector memory
4805 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4806 is guaranteed that the misalignment amount remains the same throughout the
4807 execution of the vectorized loop. Therefore, we can create the
4808 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4809 at the loop preheader.
4811 However, in the case of outer-loop vectorization, when vectorizing a
4812 memory access in the inner-loop nested within the LOOP that is now being
4813 vectorized, while it is guaranteed that the misalignment of the
4814 vectorized memory access will remain the same in different outer-loop
4815 iterations, it is *not* guaranteed that is will remain the same throughout
4816 the execution of the inner-loop. This is because the inner-loop advances
4817 with the original scalar step (and not in steps of VS). If the inner-loop
4818 step happens to be a multiple of VS, then the misalignment remains fixed
4819 and we can use the optimized realignment scheme. For example:
4821 for (i=0; i<N; i++)
4822 for (j=0; j<M; j++)
4823 s += a[i+j];
4825 When vectorizing the i-loop in the above example, the step between
4826 consecutive vector loads is 1, and so the misalignment does not remain
4827 fixed across the execution of the inner-loop, and the realignment cannot
4828 be optimized (as illustrated in the following pseudo vectorized loop):
4830 for (i=0; i<N; i+=4)
4831 for (j=0; j<M; j++){
4832 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4833 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4834 // (assuming that we start from an aligned address).
4837 We therefore have to use the unoptimized realignment scheme:
4839 for (i=0; i<N; i+=4)
4840 for (j=k; j<M; j+=4)
4841 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4842 // that the misalignment of the initial address is
4843 // 0).
4845 The loop can then be vectorized as follows:
4847 for (k=0; k<4; k++){
4848 rt = get_realignment_token (&vp[k]);
4849 for (i=0; i<N; i+=4){
4850 v1 = vp[i+k];
4851 for (j=k; j<M; j+=4){
4852 v2 = vp[i+j+VS-1];
4853 va = REALIGN_LOAD <v1,v2,rt>;
4854 vs += va;
4855 v1 = v2;
4858 } */
4860 if (DR_IS_READ (dr))
4862 bool is_packed = false;
4863 tree type = (TREE_TYPE (DR_REF (dr)));
4865 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4866 && (!targetm.vectorize.builtin_mask_for_load
4867 || targetm.vectorize.builtin_mask_for_load ()))
4869 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4870 if ((nested_in_vect_loop
4871 && (TREE_INT_CST_LOW (DR_STEP (dr))
4872 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4873 || !loop_vinfo)
4874 return dr_explicit_realign;
4875 else
4876 return dr_explicit_realign_optimized;
4878 if (!known_alignment_for_access_p (dr))
4879 is_packed = not_size_aligned (DR_REF (dr));
4881 if (targetm.vectorize.
4882 support_vector_misalignment (mode, type,
4883 DR_MISALIGNMENT (dr), is_packed))
4884 /* Can't software pipeline the loads, but can at least do them. */
4885 return dr_unaligned_supported;
4887 else
4889 bool is_packed = false;
4890 tree type = (TREE_TYPE (DR_REF (dr)));
4892 if (!known_alignment_for_access_p (dr))
4893 is_packed = not_size_aligned (DR_REF (dr));
4895 if (targetm.vectorize.
4896 support_vector_misalignment (mode, type,
4897 DR_MISALIGNMENT (dr), is_packed))
4898 return dr_unaligned_supported;
4901 /* Unsupported. */
4902 return dr_unaligned_unsupported;