2012-08-10 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob6a02986cc80069a79a0a6bb3deae17f0c7436727
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "dumpfile.h"
35 #include "cfgloop.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
39 #include "diagnostic-core.h"
41 /* Need to include rtl.h, expr.h, etc. for optabs. */
42 #include "expr.h"
43 #include "optabs.h"
45 /* Return true if load- or store-lanes optab OPTAB is implemented for
46 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
48 static bool
49 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
50 tree vectype, unsigned HOST_WIDE_INT count)
52 enum machine_mode mode, array_mode;
53 bool limit_p;
55 mode = TYPE_MODE (vectype);
56 limit_p = !targetm.array_mode_supported_p (mode, count);
57 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
58 MODE_INT, limit_p);
60 if (array_mode == BLKmode)
62 if (vect_print_dump_info (REPORT_DETAILS))
63 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
64 GET_MODE_NAME (mode), count);
65 return false;
68 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
70 if (vect_print_dump_info (REPORT_DETAILS))
71 fprintf (vect_dump, "cannot use %s<%s><%s>",
72 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
73 return false;
76 if (vect_print_dump_info (REPORT_DETAILS))
77 fprintf (vect_dump, "can use %s<%s><%s>",
78 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
80 return true;
84 /* Return the smallest scalar part of STMT.
85 This is used to determine the vectype of the stmt. We generally set the
86 vectype according to the type of the result (lhs). For stmts whose
87 result-type is different than the type of the arguments (e.g., demotion,
88 promotion), vectype will be reset appropriately (later). Note that we have
89 to visit the smallest datatype in this function, because that determines the
90 VF. If the smallest datatype in the loop is present only as the rhs of a
91 promotion operation - we'd miss it.
92 Such a case, where a variable of this datatype does not appear in the lhs
93 anywhere in the loop, can only occur if it's an invariant: e.g.:
94 'int_x = (int) short_inv', which we'd expect to have been optimized away by
95 invariant motion. However, we cannot rely on invariant motion to always
96 take invariants out of the loop, and so in the case of promotion we also
97 have to check the rhs.
98 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
99 types. */
101 tree
102 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
103 HOST_WIDE_INT *rhs_size_unit)
105 tree scalar_type = gimple_expr_type (stmt);
106 HOST_WIDE_INT lhs, rhs;
108 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
110 if (is_gimple_assign (stmt)
111 && (gimple_assign_cast_p (stmt)
112 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
113 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119 if (rhs < lhs)
120 scalar_type = rhs_type;
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
125 return scalar_type;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135 gimple next_stmt = first_stmt;
136 int result = 0;
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139 return -1;
141 while (next_stmt && next_stmt != stmt)
143 result++;
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
147 if (next_stmt)
148 return result;
149 else
150 return -1;
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
158 static void
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
162 gimple prev, next;
163 tree next_init;
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169 while (next)
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
174 /* Insert here. */
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177 return;
179 prev = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
196 FIRST_DR = DRB
197 NEXT_DR (DRB) = DRA
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
204 FIRST_DR = DRB
205 NEXT(FIRST_DR) = previous FIRST_DR
206 else
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
212 static void
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
220 tree node_init;
221 gimple node, prev, next, first_stmt;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229 return;
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
238 return;
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246 old_first_stmt)));
247 gimple tmp;
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
259 while (tmp)
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
265 else
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
271 return;
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
278 return;
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
291 else
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
301 while (node)
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305 while (next)
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
310 /* Insert here. */
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313 prev = node;
314 break;
316 prev = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
319 if (!next)
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324 prev = node;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
337 static bool
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342 gimple earlier_stmt;
344 /* We only call this function for pairs of loads and stores, but we verify
345 it here. */
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 if (DR_IS_READ (dra))
349 return false;
350 else
351 return true;
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
358 return true;
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
367 return true;
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
374 return false;
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383 return false;
385 return true;
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
394 static bool
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
406 return false;
408 /* Check:
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
412 data-refs' inits. */
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
420 return false;
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
426 if (init_a > init_b)
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
432 if (step && (init_a - init_b) > step)
433 return false;
435 if (diff_mod_size == 0)
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
445 return true;
448 else
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
454 if (step && (init_b - init_a) > step)
455 return false;
457 if (diff_mod_size == 0)
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
467 return true;
471 return false;
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
478 static bool
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489 return true;
490 else
491 return false;
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
497 static bool
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504 return true;
505 else
506 return false;
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
513 static bool
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519 return false;
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
529 if (optimize_loop_nest_for_size_p (loop))
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
533 return false;
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
537 if (loop->inner)
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541 return false;
544 /* FORNOW: We don't support creating runtime alias tests for non-constant
545 step. */
546 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
547 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
549 if (vect_print_dump_info (REPORT_DR_DETAILS))
550 fprintf (vect_dump, "versioning not yet supported for non-constant "
551 "step");
552 return false;
555 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
556 return true;
560 /* Function vect_analyze_data_ref_dependence.
562 Return TRUE if there (might) exist a dependence between a memory-reference
563 DRA and a memory-reference DRB. When versioning for alias may check a
564 dependence at run-time, return FALSE. Adjust *MAX_VF according to
565 the data dependence. */
567 static bool
568 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
569 loop_vec_info loop_vinfo, int *max_vf)
571 unsigned int i;
572 struct loop *loop = NULL;
573 struct data_reference *dra = DDR_A (ddr);
574 struct data_reference *drb = DDR_B (ddr);
575 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
576 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
577 lambda_vector dist_v;
578 unsigned int loop_depth;
580 /* Don't bother to analyze statements marked as unvectorizable. */
581 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
582 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
583 return false;
585 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
587 /* Independent data accesses. */
588 vect_check_interleaving (dra, drb);
589 return false;
592 if (loop_vinfo)
593 loop = LOOP_VINFO_LOOP (loop_vinfo);
595 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
596 return false;
598 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
600 gimple earlier_stmt;
602 if (loop_vinfo)
604 if (vect_print_dump_info (REPORT_DR_DETAILS))
606 fprintf (vect_dump, "versioning for alias required: "
607 "can't determine dependence between ");
608 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
609 fprintf (vect_dump, " and ");
610 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
613 /* Add to list of ddrs that need to be tested at run-time. */
614 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
617 /* When vectorizing a basic block unknown depnedence can still mean
618 grouped access. */
619 if (vect_check_interleaving (dra, drb))
620 return false;
622 /* Read-read is OK (we need this check here, after checking for
623 interleaving). */
624 if (DR_IS_READ (dra) && DR_IS_READ (drb))
625 return false;
627 if (vect_print_dump_info (REPORT_DR_DETAILS))
629 fprintf (vect_dump, "can't determine dependence between ");
630 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
631 fprintf (vect_dump, " and ");
632 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
635 /* We do not vectorize basic blocks with write-write dependencies. */
636 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
637 return true;
639 /* Check that it's not a load-after-store dependence. */
640 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
641 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
642 return true;
644 return false;
647 /* Versioning for alias is not yet supported for basic block SLP, and
648 dependence distance is unapplicable, hence, in case of known data
649 dependence, basic block vectorization is impossible for now. */
650 if (!loop_vinfo)
652 if (dra != drb && vect_check_interleaving (dra, drb))
653 return false;
655 if (vect_print_dump_info (REPORT_DR_DETAILS))
657 fprintf (vect_dump, "determined dependence between ");
658 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
659 fprintf (vect_dump, " and ");
660 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
663 /* Do not vectorize basic blcoks with write-write dependences. */
664 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
665 return true;
667 /* Check if this dependence is allowed in basic block vectorization. */
668 return vect_drs_dependent_in_basic_block (dra, drb);
671 /* Loop-based vectorization and known data dependence. */
672 if (DDR_NUM_DIST_VECTS (ddr) == 0)
674 if (vect_print_dump_info (REPORT_DR_DETAILS))
676 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
677 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
678 fprintf (vect_dump, " and ");
679 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
681 /* Add to list of ddrs that need to be tested at run-time. */
682 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
685 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
686 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
688 int dist = dist_v[loop_depth];
690 if (vect_print_dump_info (REPORT_DR_DETAILS))
691 fprintf (vect_dump, "dependence distance = %d.", dist);
693 if (dist == 0)
695 if (vect_print_dump_info (REPORT_DR_DETAILS))
697 fprintf (vect_dump, "dependence distance == 0 between ");
698 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
699 fprintf (vect_dump, " and ");
700 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
703 /* For interleaving, mark that there is a read-write dependency if
704 necessary. We check before that one of the data-refs is store. */
705 if (DR_IS_READ (dra))
706 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
707 else
709 if (DR_IS_READ (drb))
710 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
713 continue;
716 if (dist > 0 && DDR_REVERSED_P (ddr))
718 /* If DDR_REVERSED_P the order of the data-refs in DDR was
719 reversed (to make distance vector positive), and the actual
720 distance is negative. */
721 if (vect_print_dump_info (REPORT_DR_DETAILS))
722 fprintf (vect_dump, "dependence distance negative.");
723 continue;
726 if (abs (dist) >= 2
727 && abs (dist) < *max_vf)
729 /* The dependence distance requires reduction of the maximal
730 vectorization factor. */
731 *max_vf = abs (dist);
732 if (vect_print_dump_info (REPORT_DR_DETAILS))
733 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
734 *max_vf);
737 if (abs (dist) >= *max_vf)
739 /* Dependence distance does not create dependence, as far as
740 vectorization is concerned, in this case. */
741 if (vect_print_dump_info (REPORT_DR_DETAILS))
742 fprintf (vect_dump, "dependence distance >= VF.");
743 continue;
746 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
748 fprintf (vect_dump, "not vectorized, possible dependence "
749 "between data-refs ");
750 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
751 fprintf (vect_dump, " and ");
752 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
755 return true;
758 return false;
761 /* Function vect_analyze_data_ref_dependences.
763 Examine all the data references in the loop, and make sure there do not
764 exist any data dependences between them. Set *MAX_VF according to
765 the maximum vectorization factor the data dependences allow. */
767 bool
768 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
769 bb_vec_info bb_vinfo, int *max_vf)
771 unsigned int i;
772 VEC (ddr_p, heap) *ddrs = NULL;
773 struct data_dependence_relation *ddr;
775 if (vect_print_dump_info (REPORT_DETAILS))
776 fprintf (vect_dump, "=== vect_analyze_dependences ===");
778 if (loop_vinfo)
779 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
780 else
781 ddrs = BB_VINFO_DDRS (bb_vinfo);
783 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
784 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
785 return false;
787 return true;
791 /* Function vect_compute_data_ref_alignment
793 Compute the misalignment of the data reference DR.
795 Output:
796 1. If during the misalignment computation it is found that the data reference
797 cannot be vectorized then false is returned.
798 2. DR_MISALIGNMENT (DR) is defined.
800 FOR NOW: No analysis is actually performed. Misalignment is calculated
801 only for trivial cases. TODO. */
803 static bool
804 vect_compute_data_ref_alignment (struct data_reference *dr)
806 gimple stmt = DR_STMT (dr);
807 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
808 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
809 struct loop *loop = NULL;
810 tree ref = DR_REF (dr);
811 tree vectype;
812 tree base, base_addr;
813 bool base_aligned;
814 tree misalign;
815 tree aligned_to, alignment;
817 if (vect_print_dump_info (REPORT_DETAILS))
818 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
820 if (loop_vinfo)
821 loop = LOOP_VINFO_LOOP (loop_vinfo);
823 /* Initialize misalignment to unknown. */
824 SET_DR_MISALIGNMENT (dr, -1);
826 /* Strided loads perform only component accesses, misalignment information
827 is irrelevant for them. */
828 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
829 return true;
831 misalign = DR_INIT (dr);
832 aligned_to = DR_ALIGNED_TO (dr);
833 base_addr = DR_BASE_ADDRESS (dr);
834 vectype = STMT_VINFO_VECTYPE (stmt_info);
836 /* In case the dataref is in an inner-loop of the loop that is being
837 vectorized (LOOP), we use the base and misalignment information
838 relative to the outer-loop (LOOP). This is ok only if the misalignment
839 stays the same throughout the execution of the inner-loop, which is why
840 we have to check that the stride of the dataref in the inner-loop evenly
841 divides by the vector size. */
842 if (loop && nested_in_vect_loop_p (loop, stmt))
844 tree step = DR_STEP (dr);
845 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
847 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
849 if (vect_print_dump_info (REPORT_ALIGNMENT))
850 fprintf (vect_dump, "inner step divides the vector-size.");
851 misalign = STMT_VINFO_DR_INIT (stmt_info);
852 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
853 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
855 else
857 if (vect_print_dump_info (REPORT_ALIGNMENT))
858 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
859 misalign = NULL_TREE;
863 /* Similarly, if we're doing basic-block vectorization, we can only use
864 base and misalignment information relative to an innermost loop if the
865 misalignment stays the same throughout the execution of the loop.
866 As above, this is the case if the stride of the dataref evenly divides
867 by the vector size. */
868 if (!loop)
870 tree step = DR_STEP (dr);
871 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
873 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
875 if (vect_print_dump_info (REPORT_ALIGNMENT))
876 fprintf (vect_dump, "SLP: step doesn't divide the vector-size.");
877 misalign = NULL_TREE;
881 base = build_fold_indirect_ref (base_addr);
882 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
884 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
885 || !misalign)
887 if (vect_print_dump_info (REPORT_ALIGNMENT))
889 fprintf (vect_dump, "Unknown alignment for access: ");
890 print_generic_expr (vect_dump, base, TDF_SLIM);
892 return true;
895 if ((DECL_P (base)
896 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
897 alignment) >= 0)
898 || (TREE_CODE (base_addr) == SSA_NAME
899 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
900 TREE_TYPE (base_addr)))),
901 alignment) >= 0)
902 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
903 base_aligned = true;
904 else
905 base_aligned = false;
907 if (!base_aligned)
909 /* Do not change the alignment of global variables here if
910 flag_section_anchors is enabled as we already generated
911 RTL for other functions. Most global variables should
912 have been aligned during the IPA increase_alignment pass. */
913 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
914 || (TREE_STATIC (base) && flag_section_anchors))
916 if (vect_print_dump_info (REPORT_DETAILS))
918 fprintf (vect_dump, "can't force alignment of ref: ");
919 print_generic_expr (vect_dump, ref, TDF_SLIM);
921 return true;
924 /* Force the alignment of the decl.
925 NOTE: This is the only change to the code we make during
926 the analysis phase, before deciding to vectorize the loop. */
927 if (vect_print_dump_info (REPORT_DETAILS))
929 fprintf (vect_dump, "force alignment of ");
930 print_generic_expr (vect_dump, ref, TDF_SLIM);
933 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
934 DECL_USER_ALIGN (base) = 1;
937 /* At this point we assume that the base is aligned. */
938 gcc_assert (base_aligned
939 || (TREE_CODE (base) == VAR_DECL
940 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
942 /* If this is a backward running DR then first access in the larger
943 vectype actually is N-1 elements before the address in the DR.
944 Adjust misalign accordingly. */
945 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
947 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
948 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
949 otherwise we wouldn't be here. */
950 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
951 /* PLUS because DR_STEP was negative. */
952 misalign = size_binop (PLUS_EXPR, misalign, offset);
955 /* Modulo alignment. */
956 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
958 if (!host_integerp (misalign, 1))
960 /* Negative or overflowed misalignment value. */
961 if (vect_print_dump_info (REPORT_DETAILS))
962 fprintf (vect_dump, "unexpected misalign value");
963 return false;
966 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
968 if (vect_print_dump_info (REPORT_DETAILS))
970 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
971 print_generic_expr (vect_dump, ref, TDF_SLIM);
974 return true;
978 /* Function vect_compute_data_refs_alignment
980 Compute the misalignment of data references in the loop.
981 Return FALSE if a data reference is found that cannot be vectorized. */
983 static bool
984 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
985 bb_vec_info bb_vinfo)
987 VEC (data_reference_p, heap) *datarefs;
988 struct data_reference *dr;
989 unsigned int i;
991 if (loop_vinfo)
992 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
993 else
994 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
996 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
997 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
998 && !vect_compute_data_ref_alignment (dr))
1000 if (bb_vinfo)
1002 /* Mark unsupported statement as unvectorizable. */
1003 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1004 continue;
1006 else
1007 return false;
1010 return true;
1014 /* Function vect_update_misalignment_for_peel
1016 DR - the data reference whose misalignment is to be adjusted.
1017 DR_PEEL - the data reference whose misalignment is being made
1018 zero in the vector loop by the peel.
1019 NPEEL - the number of iterations in the peel loop if the misalignment
1020 of DR_PEEL is known at compile time. */
1022 static void
1023 vect_update_misalignment_for_peel (struct data_reference *dr,
1024 struct data_reference *dr_peel, int npeel)
1026 unsigned int i;
1027 VEC(dr_p,heap) *same_align_drs;
1028 struct data_reference *current_dr;
1029 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1030 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1031 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1032 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1034 /* For interleaved data accesses the step in the loop must be multiplied by
1035 the size of the interleaving group. */
1036 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1037 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1038 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
1039 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1041 /* It can be assumed that the data refs with the same alignment as dr_peel
1042 are aligned in the vector loop. */
1043 same_align_drs
1044 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1045 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1047 if (current_dr != dr)
1048 continue;
1049 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1050 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1051 SET_DR_MISALIGNMENT (dr, 0);
1052 return;
1055 if (known_alignment_for_access_p (dr)
1056 && known_alignment_for_access_p (dr_peel))
1058 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1059 int misal = DR_MISALIGNMENT (dr);
1060 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1061 misal += negative ? -npeel * dr_size : npeel * dr_size;
1062 misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
1063 SET_DR_MISALIGNMENT (dr, misal);
1064 return;
1067 if (vect_print_dump_info (REPORT_DETAILS))
1068 fprintf (vect_dump, "Setting misalignment to -1.");
1069 SET_DR_MISALIGNMENT (dr, -1);
1073 /* Function vect_verify_datarefs_alignment
1075 Return TRUE if all data references in the loop can be
1076 handled with respect to alignment. */
1078 bool
1079 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1081 VEC (data_reference_p, heap) *datarefs;
1082 struct data_reference *dr;
1083 enum dr_alignment_support supportable_dr_alignment;
1084 unsigned int i;
1086 if (loop_vinfo)
1087 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1088 else
1089 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1091 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1093 gimple stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1096 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1097 continue;
1099 /* For interleaving, only the alignment of the first access matters.
1100 Skip statements marked as not vectorizable. */
1101 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
1102 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1103 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1104 continue;
1106 /* Strided loads perform only component accesses, alignment is
1107 irrelevant for them. */
1108 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1109 continue;
1111 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1112 if (!supportable_dr_alignment)
1114 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1116 if (DR_IS_READ (dr))
1117 fprintf (vect_dump,
1118 "not vectorized: unsupported unaligned load.");
1119 else
1120 fprintf (vect_dump,
1121 "not vectorized: unsupported unaligned store.");
1123 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1125 return false;
1127 if (supportable_dr_alignment != dr_aligned
1128 && vect_print_dump_info (REPORT_ALIGNMENT))
1129 fprintf (vect_dump, "Vectorizing an unaligned access.");
1131 return true;
1134 /* Given an memory reference EXP return whether its alignment is less
1135 than its size. */
1137 static bool
1138 not_size_aligned (tree exp)
1140 if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp)), 1))
1141 return true;
1143 return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp)))
1144 > get_object_alignment (exp));
1147 /* Function vector_alignment_reachable_p
1149 Return true if vector alignment for DR is reachable by peeling
1150 a few loop iterations. Return false otherwise. */
1152 static bool
1153 vector_alignment_reachable_p (struct data_reference *dr)
1155 gimple stmt = DR_STMT (dr);
1156 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1157 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1159 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1161 /* For interleaved access we peel only if number of iterations in
1162 the prolog loop ({VF - misalignment}), is a multiple of the
1163 number of the interleaved accesses. */
1164 int elem_size, mis_in_elements;
1165 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1167 /* FORNOW: handle only known alignment. */
1168 if (!known_alignment_for_access_p (dr))
1169 return false;
1171 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1172 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1174 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1175 return false;
1178 /* If misalignment is known at the compile time then allow peeling
1179 only if natural alignment is reachable through peeling. */
1180 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1182 HOST_WIDE_INT elmsize =
1183 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1184 if (vect_print_dump_info (REPORT_DETAILS))
1186 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1187 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1189 if (DR_MISALIGNMENT (dr) % elmsize)
1191 if (vect_print_dump_info (REPORT_DETAILS))
1192 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1193 return false;
1197 if (!known_alignment_for_access_p (dr))
1199 tree type = TREE_TYPE (DR_REF (dr));
1200 bool is_packed = not_size_aligned (DR_REF (dr));
1201 if (vect_print_dump_info (REPORT_DETAILS))
1202 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1203 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1204 return true;
1205 else
1206 return false;
1209 return true;
1213 /* Calculate the cost of the memory access represented by DR. */
1215 static void
1216 vect_get_data_access_cost (struct data_reference *dr,
1217 unsigned int *inside_cost,
1218 unsigned int *outside_cost,
1219 stmt_vector_for_cost *body_cost_vec)
1221 gimple stmt = DR_STMT (dr);
1222 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1223 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1224 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1225 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1226 int ncopies = vf / nunits;
1228 if (DR_IS_READ (dr))
1229 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1230 NULL, body_cost_vec, false);
1231 else
1232 vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1234 if (vect_print_dump_info (REPORT_COST))
1235 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1236 "outside_cost = %d.", *inside_cost, *outside_cost);
1240 static hashval_t
1241 vect_peeling_hash (const void *elem)
1243 const struct _vect_peel_info *peel_info;
1245 peel_info = (const struct _vect_peel_info *) elem;
1246 return (hashval_t) peel_info->npeel;
1250 static int
1251 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1253 const struct _vect_peel_info *a, *b;
1255 a = (const struct _vect_peel_info *) elem1;
1256 b = (const struct _vect_peel_info *) elem2;
1257 return (a->npeel == b->npeel);
1261 /* Insert DR into peeling hash table with NPEEL as key. */
1263 static void
1264 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1265 int npeel)
1267 struct _vect_peel_info elem, *slot;
1268 void **new_slot;
1269 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1271 elem.npeel = npeel;
1272 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1273 &elem);
1274 if (slot)
1275 slot->count++;
1276 else
1278 slot = XNEW (struct _vect_peel_info);
1279 slot->npeel = npeel;
1280 slot->dr = dr;
1281 slot->count = 1;
1282 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1283 INSERT);
1284 *new_slot = slot;
1287 if (!supportable_dr_alignment && !flag_vect_cost_model)
1288 slot->count += VECT_MAX_COST;
1292 /* Traverse peeling hash table to find peeling option that aligns maximum
1293 number of data accesses. */
1295 static int
1296 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1298 vect_peel_info elem = (vect_peel_info) *slot;
1299 vect_peel_extended_info max = (vect_peel_extended_info) data;
1301 if (elem->count > max->peel_info.count
1302 || (elem->count == max->peel_info.count
1303 && max->peel_info.npeel > elem->npeel))
1305 max->peel_info.npeel = elem->npeel;
1306 max->peel_info.count = elem->count;
1307 max->peel_info.dr = elem->dr;
1310 return 1;
1314 /* Traverse peeling hash table and calculate cost for each peeling option.
1315 Find the one with the lowest cost. */
1317 static int
1318 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1320 vect_peel_info elem = (vect_peel_info) *slot;
1321 vect_peel_extended_info min = (vect_peel_extended_info) data;
1322 int save_misalignment, dummy;
1323 unsigned int inside_cost = 0, outside_cost = 0, i;
1324 gimple stmt = DR_STMT (elem->dr);
1325 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1326 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1327 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1328 struct data_reference *dr;
1329 stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1330 int single_iter_cost;
1332 prologue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1333 body_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1334 epilogue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1336 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1338 stmt = DR_STMT (dr);
1339 stmt_info = vinfo_for_stmt (stmt);
1340 /* For interleaving, only the alignment of the first access
1341 matters. */
1342 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1343 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1344 continue;
1346 save_misalignment = DR_MISALIGNMENT (dr);
1347 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1348 vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1349 &body_cost_vec);
1350 SET_DR_MISALIGNMENT (dr, save_misalignment);
1353 single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1354 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1355 &dummy, single_iter_cost,
1356 &prologue_cost_vec,
1357 &epilogue_cost_vec);
1359 /* Prologue and epilogue costs are added to the target model later.
1360 These costs depend only on the scalar iteration cost, the
1361 number of peeling iterations finally chosen, and the number of
1362 misaligned statements. So discard the information found here. */
1363 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1364 VEC_free (stmt_info_for_cost, heap, epilogue_cost_vec);
1366 if (inside_cost < min->inside_cost
1367 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1369 min->inside_cost = inside_cost;
1370 min->outside_cost = outside_cost;
1371 min->body_cost_vec = body_cost_vec;
1372 min->peel_info.dr = elem->dr;
1373 min->peel_info.npeel = elem->npeel;
1375 else
1376 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1378 return 1;
1382 /* Choose best peeling option by traversing peeling hash table and either
1383 choosing an option with the lowest cost (if cost model is enabled) or the
1384 option that aligns as many accesses as possible. */
1386 static struct data_reference *
1387 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1388 unsigned int *npeel,
1389 stmt_vector_for_cost *body_cost_vec)
1391 struct _vect_peel_extended_info res;
1393 res.peel_info.dr = NULL;
1394 res.body_cost_vec = NULL;
1396 if (flag_vect_cost_model)
1398 res.inside_cost = INT_MAX;
1399 res.outside_cost = INT_MAX;
1400 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1401 vect_peeling_hash_get_lowest_cost, &res);
1403 else
1405 res.peel_info.count = 0;
1406 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1407 vect_peeling_hash_get_most_frequent, &res);
1410 *npeel = res.peel_info.npeel;
1411 *body_cost_vec = res.body_cost_vec;
1412 return res.peel_info.dr;
1416 /* Function vect_enhance_data_refs_alignment
1418 This pass will use loop versioning and loop peeling in order to enhance
1419 the alignment of data references in the loop.
1421 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1422 original loop is to be vectorized. Any other loops that are created by
1423 the transformations performed in this pass - are not supposed to be
1424 vectorized. This restriction will be relaxed.
1426 This pass will require a cost model to guide it whether to apply peeling
1427 or versioning or a combination of the two. For example, the scheme that
1428 intel uses when given a loop with several memory accesses, is as follows:
1429 choose one memory access ('p') which alignment you want to force by doing
1430 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1431 other accesses are not necessarily aligned, or (2) use loop versioning to
1432 generate one loop in which all accesses are aligned, and another loop in
1433 which only 'p' is necessarily aligned.
1435 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1436 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1437 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1439 Devising a cost model is the most critical aspect of this work. It will
1440 guide us on which access to peel for, whether to use loop versioning, how
1441 many versions to create, etc. The cost model will probably consist of
1442 generic considerations as well as target specific considerations (on
1443 powerpc for example, misaligned stores are more painful than misaligned
1444 loads).
1446 Here are the general steps involved in alignment enhancements:
1448 -- original loop, before alignment analysis:
1449 for (i=0; i<N; i++){
1450 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1451 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1454 -- After vect_compute_data_refs_alignment:
1455 for (i=0; i<N; i++){
1456 x = q[i]; # DR_MISALIGNMENT(q) = 3
1457 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1460 -- Possibility 1: we do loop versioning:
1461 if (p is aligned) {
1462 for (i=0; i<N; i++){ # loop 1A
1463 x = q[i]; # DR_MISALIGNMENT(q) = 3
1464 p[i] = y; # DR_MISALIGNMENT(p) = 0
1467 else {
1468 for (i=0; i<N; i++){ # loop 1B
1469 x = q[i]; # DR_MISALIGNMENT(q) = 3
1470 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1474 -- Possibility 2: we do loop peeling:
1475 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1476 x = q[i];
1477 p[i] = y;
1479 for (i = 3; i < N; i++){ # loop 2A
1480 x = q[i]; # DR_MISALIGNMENT(q) = 0
1481 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1484 -- Possibility 3: combination of loop peeling and versioning:
1485 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1486 x = q[i];
1487 p[i] = y;
1489 if (p is aligned) {
1490 for (i = 3; i<N; i++){ # loop 3A
1491 x = q[i]; # DR_MISALIGNMENT(q) = 0
1492 p[i] = y; # DR_MISALIGNMENT(p) = 0
1495 else {
1496 for (i = 3; i<N; i++){ # loop 3B
1497 x = q[i]; # DR_MISALIGNMENT(q) = 0
1498 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1502 These loops are later passed to loop_transform to be vectorized. The
1503 vectorizer will use the alignment information to guide the transformation
1504 (whether to generate regular loads/stores, or with special handling for
1505 misalignment). */
1507 bool
1508 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1510 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1511 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1512 enum dr_alignment_support supportable_dr_alignment;
1513 struct data_reference *dr0 = NULL, *first_store = NULL;
1514 struct data_reference *dr;
1515 unsigned int i, j;
1516 bool do_peeling = false;
1517 bool do_versioning = false;
1518 bool stat;
1519 gimple stmt;
1520 stmt_vec_info stmt_info;
1521 int vect_versioning_for_alias_required;
1522 unsigned int npeel = 0;
1523 bool all_misalignments_unknown = true;
1524 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1525 unsigned possible_npeel_number = 1;
1526 tree vectype;
1527 unsigned int nelements, mis, same_align_drs_max = 0;
1528 stmt_vector_for_cost body_cost_vec = NULL;
1530 if (vect_print_dump_info (REPORT_DETAILS))
1531 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1533 /* While cost model enhancements are expected in the future, the high level
1534 view of the code at this time is as follows:
1536 A) If there is a misaligned access then see if peeling to align
1537 this access can make all data references satisfy
1538 vect_supportable_dr_alignment. If so, update data structures
1539 as needed and return true.
1541 B) If peeling wasn't possible and there is a data reference with an
1542 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1543 then see if loop versioning checks can be used to make all data
1544 references satisfy vect_supportable_dr_alignment. If so, update
1545 data structures as needed and return true.
1547 C) If neither peeling nor versioning were successful then return false if
1548 any data reference does not satisfy vect_supportable_dr_alignment.
1550 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1552 Note, Possibility 3 above (which is peeling and versioning together) is not
1553 being done at this time. */
1555 /* (1) Peeling to force alignment. */
1557 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1558 Considerations:
1559 + How many accesses will become aligned due to the peeling
1560 - How many accesses will become unaligned due to the peeling,
1561 and the cost of misaligned accesses.
1562 - The cost of peeling (the extra runtime checks, the increase
1563 in code size). */
1565 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1567 stmt = DR_STMT (dr);
1568 stmt_info = vinfo_for_stmt (stmt);
1570 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1571 continue;
1573 /* For interleaving, only the alignment of the first access
1574 matters. */
1575 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1576 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1577 continue;
1579 /* FORNOW: Any strided load prevents peeling. The induction
1580 variable analysis will fail when the prologue loop is generated,
1581 and so we can't generate the new base for the pointer. */
1582 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1584 if (vect_print_dump_info (REPORT_DETAILS))
1585 fprintf (vect_dump, "strided load prevents peeling");
1586 do_peeling = false;
1587 break;
1590 /* For invariant accesses there is nothing to enhance. */
1591 if (integer_zerop (DR_STEP (dr)))
1592 continue;
1594 /* Strided loads perform only component accesses, alignment is
1595 irrelevant for them. */
1596 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1597 continue;
1599 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1600 do_peeling = vector_alignment_reachable_p (dr);
1601 if (do_peeling)
1603 if (known_alignment_for_access_p (dr))
1605 unsigned int npeel_tmp;
1606 bool negative = tree_int_cst_compare (DR_STEP (dr),
1607 size_zero_node) < 0;
1609 /* Save info about DR in the hash table. */
1610 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1611 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1612 htab_create (1, vect_peeling_hash,
1613 vect_peeling_hash_eq, free);
1615 vectype = STMT_VINFO_VECTYPE (stmt_info);
1616 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1617 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1618 TREE_TYPE (DR_REF (dr))));
1619 npeel_tmp = (negative
1620 ? (mis - nelements) : (nelements - mis))
1621 & (nelements - 1);
1623 /* For multiple types, it is possible that the bigger type access
1624 will have more than one peeling option. E.g., a loop with two
1625 types: one of size (vector size / 4), and the other one of
1626 size (vector size / 8). Vectorization factor will 8. If both
1627 access are misaligned by 3, the first one needs one scalar
1628 iteration to be aligned, and the second one needs 5. But the
1629 the first one will be aligned also by peeling 5 scalar
1630 iterations, and in that case both accesses will be aligned.
1631 Hence, except for the immediate peeling amount, we also want
1632 to try to add full vector size, while we don't exceed
1633 vectorization factor.
1634 We do this automtically for cost model, since we calculate cost
1635 for every peeling option. */
1636 if (!flag_vect_cost_model)
1637 possible_npeel_number = vf /nelements;
1639 /* Handle the aligned case. We may decide to align some other
1640 access, making DR unaligned. */
1641 if (DR_MISALIGNMENT (dr) == 0)
1643 npeel_tmp = 0;
1644 if (!flag_vect_cost_model)
1645 possible_npeel_number++;
1648 for (j = 0; j < possible_npeel_number; j++)
1650 gcc_assert (npeel_tmp <= vf);
1651 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1652 npeel_tmp += nelements;
1655 all_misalignments_unknown = false;
1656 /* Data-ref that was chosen for the case that all the
1657 misalignments are unknown is not relevant anymore, since we
1658 have a data-ref with known alignment. */
1659 dr0 = NULL;
1661 else
1663 /* If we don't know all the misalignment values, we prefer
1664 peeling for data-ref that has maximum number of data-refs
1665 with the same alignment, unless the target prefers to align
1666 stores over load. */
1667 if (all_misalignments_unknown)
1669 if (same_align_drs_max < VEC_length (dr_p,
1670 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1671 || !dr0)
1673 same_align_drs_max = VEC_length (dr_p,
1674 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1675 dr0 = dr;
1678 if (!first_store && DR_IS_WRITE (dr))
1679 first_store = dr;
1682 /* If there are both known and unknown misaligned accesses in the
1683 loop, we choose peeling amount according to the known
1684 accesses. */
1687 if (!supportable_dr_alignment)
1689 dr0 = dr;
1690 if (!first_store && DR_IS_WRITE (dr))
1691 first_store = dr;
1695 else
1697 if (!aligned_access_p (dr))
1699 if (vect_print_dump_info (REPORT_DETAILS))
1700 fprintf (vect_dump, "vector alignment may not be reachable");
1702 break;
1707 vect_versioning_for_alias_required
1708 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1710 /* Temporarily, if versioning for alias is required, we disable peeling
1711 until we support peeling and versioning. Often peeling for alignment
1712 will require peeling for loop-bound, which in turn requires that we
1713 know how to adjust the loop ivs after the loop. */
1714 if (vect_versioning_for_alias_required
1715 || !vect_can_advance_ivs_p (loop_vinfo)
1716 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1717 do_peeling = false;
1719 if (do_peeling && all_misalignments_unknown
1720 && vect_supportable_dr_alignment (dr0, false))
1723 /* Check if the target requires to prefer stores over loads, i.e., if
1724 misaligned stores are more expensive than misaligned loads (taking
1725 drs with same alignment into account). */
1726 if (first_store && DR_IS_READ (dr0))
1728 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1729 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1730 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1731 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1732 stmt_vector_for_cost dummy = VEC_alloc (stmt_info_for_cost, heap, 2);
1734 vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1735 &dummy);
1736 vect_get_data_access_cost (first_store, &store_inside_cost,
1737 &store_outside_cost, &dummy);
1739 VEC_free (stmt_info_for_cost, heap, dummy);
1741 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1742 aligning the load DR0). */
1743 load_inside_penalty = store_inside_cost;
1744 load_outside_penalty = store_outside_cost;
1745 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1746 (vinfo_for_stmt (DR_STMT (first_store))),
1747 i, dr);
1748 i++)
1749 if (DR_IS_READ (dr))
1751 load_inside_penalty += load_inside_cost;
1752 load_outside_penalty += load_outside_cost;
1754 else
1756 load_inside_penalty += store_inside_cost;
1757 load_outside_penalty += store_outside_cost;
1760 /* Calculate the penalty for leaving DR0 unaligned (by
1761 aligning the FIRST_STORE). */
1762 store_inside_penalty = load_inside_cost;
1763 store_outside_penalty = load_outside_cost;
1764 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1765 (vinfo_for_stmt (DR_STMT (dr0))),
1766 i, dr);
1767 i++)
1768 if (DR_IS_READ (dr))
1770 store_inside_penalty += load_inside_cost;
1771 store_outside_penalty += load_outside_cost;
1773 else
1775 store_inside_penalty += store_inside_cost;
1776 store_outside_penalty += store_outside_cost;
1779 if (load_inside_penalty > store_inside_penalty
1780 || (load_inside_penalty == store_inside_penalty
1781 && load_outside_penalty > store_outside_penalty))
1782 dr0 = first_store;
1785 /* In case there are only loads with different unknown misalignments, use
1786 peeling only if it may help to align other accesses in the loop. */
1787 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1788 (vinfo_for_stmt (DR_STMT (dr0))))
1789 && vect_supportable_dr_alignment (dr0, false)
1790 != dr_unaligned_supported)
1791 do_peeling = false;
1794 if (do_peeling && !dr0)
1796 /* Peeling is possible, but there is no data access that is not supported
1797 unless aligned. So we try to choose the best possible peeling. */
1799 /* We should get here only if there are drs with known misalignment. */
1800 gcc_assert (!all_misalignments_unknown);
1802 /* Choose the best peeling from the hash table. */
1803 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
1804 &body_cost_vec);
1805 if (!dr0 || !npeel)
1806 do_peeling = false;
1809 if (do_peeling)
1811 stmt = DR_STMT (dr0);
1812 stmt_info = vinfo_for_stmt (stmt);
1813 vectype = STMT_VINFO_VECTYPE (stmt_info);
1814 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1816 if (known_alignment_for_access_p (dr0))
1818 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1819 size_zero_node) < 0;
1820 if (!npeel)
1822 /* Since it's known at compile time, compute the number of
1823 iterations in the peeled loop (the peeling factor) for use in
1824 updating DR_MISALIGNMENT values. The peeling factor is the
1825 vectorization factor minus the misalignment as an element
1826 count. */
1827 mis = DR_MISALIGNMENT (dr0);
1828 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1829 npeel = ((negative ? mis - nelements : nelements - mis)
1830 & (nelements - 1));
1833 /* For interleaved data access every iteration accesses all the
1834 members of the group, therefore we divide the number of iterations
1835 by the group size. */
1836 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1837 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1838 npeel /= GROUP_SIZE (stmt_info);
1840 if (vect_print_dump_info (REPORT_DETAILS))
1841 fprintf (vect_dump, "Try peeling by %d", npeel);
1844 /* Ensure that all data refs can be vectorized after the peel. */
1845 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1847 int save_misalignment;
1849 if (dr == dr0)
1850 continue;
1852 stmt = DR_STMT (dr);
1853 stmt_info = vinfo_for_stmt (stmt);
1854 /* For interleaving, only the alignment of the first access
1855 matters. */
1856 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1857 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1858 continue;
1860 /* Strided loads perform only component accesses, alignment is
1861 irrelevant for them. */
1862 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1863 continue;
1865 save_misalignment = DR_MISALIGNMENT (dr);
1866 vect_update_misalignment_for_peel (dr, dr0, npeel);
1867 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1868 SET_DR_MISALIGNMENT (dr, save_misalignment);
1870 if (!supportable_dr_alignment)
1872 do_peeling = false;
1873 break;
1877 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1879 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1880 if (!stat)
1881 do_peeling = false;
1882 else
1883 return stat;
1886 if (do_peeling)
1888 stmt_info_for_cost *si;
1889 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
1891 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1892 If the misalignment of DR_i is identical to that of dr0 then set
1893 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1894 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1895 by the peeling factor times the element size of DR_i (MOD the
1896 vectorization factor times the size). Otherwise, the
1897 misalignment of DR_i must be set to unknown. */
1898 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1899 if (dr != dr0)
1900 vect_update_misalignment_for_peel (dr, dr0, npeel);
1902 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1903 if (npeel)
1904 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1905 else
1906 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1907 SET_DR_MISALIGNMENT (dr0, 0);
1908 if (vect_print_dump_info (REPORT_ALIGNMENT))
1909 fprintf (vect_dump, "Alignment of access forced using peeling.");
1911 if (vect_print_dump_info (REPORT_DETAILS))
1912 fprintf (vect_dump, "Peeling for alignment will be applied.");
1914 /* We've delayed passing the inside-loop peeling costs to the
1915 target cost model until we were sure peeling would happen.
1916 Do so now. */
1917 if (body_cost_vec)
1919 FOR_EACH_VEC_ELT (stmt_info_for_cost, body_cost_vec, i, si)
1921 struct _stmt_vec_info *stmt_info
1922 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1923 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1924 si->misalign, vect_body);
1926 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1929 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1930 gcc_assert (stat);
1931 return stat;
1936 /* (2) Versioning to force alignment. */
1938 /* Try versioning if:
1939 1) flag_tree_vect_loop_version is TRUE
1940 2) optimize loop for speed
1941 3) there is at least one unsupported misaligned data ref with an unknown
1942 misalignment, and
1943 4) all misaligned data refs with a known misalignment are supported, and
1944 5) the number of runtime alignment checks is within reason. */
1946 do_versioning =
1947 flag_tree_vect_loop_version
1948 && optimize_loop_nest_for_speed_p (loop)
1949 && (!loop->inner); /* FORNOW */
1951 if (do_versioning)
1953 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1955 stmt = DR_STMT (dr);
1956 stmt_info = vinfo_for_stmt (stmt);
1958 /* For interleaving, only the alignment of the first access
1959 matters. */
1960 if (aligned_access_p (dr)
1961 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1962 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1963 continue;
1965 /* Strided loads perform only component accesses, alignment is
1966 irrelevant for them. */
1967 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1968 continue;
1970 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1972 if (!supportable_dr_alignment)
1974 gimple stmt;
1975 int mask;
1976 tree vectype;
1978 if (known_alignment_for_access_p (dr)
1979 || VEC_length (gimple,
1980 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1981 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1983 do_versioning = false;
1984 break;
1987 stmt = DR_STMT (dr);
1988 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1989 gcc_assert (vectype);
1991 /* The rightmost bits of an aligned address must be zeros.
1992 Construct the mask needed for this test. For example,
1993 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1994 mask must be 15 = 0xf. */
1995 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1997 /* FORNOW: use the same mask to test all potentially unaligned
1998 references in the loop. The vectorizer currently supports
1999 a single vector size, see the reference to
2000 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2001 vectorization factor is computed. */
2002 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2003 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2004 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2005 VEC_safe_push (gimple, heap,
2006 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2007 DR_STMT (dr));
2011 /* Versioning requires at least one misaligned data reference. */
2012 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2013 do_versioning = false;
2014 else if (!do_versioning)
2015 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2018 if (do_versioning)
2020 VEC(gimple,heap) *may_misalign_stmts
2021 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2022 gimple stmt;
2024 /* It can now be assumed that the data references in the statements
2025 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2026 of the loop being vectorized. */
2027 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
2029 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2030 dr = STMT_VINFO_DATA_REF (stmt_info);
2031 SET_DR_MISALIGNMENT (dr, 0);
2032 if (vect_print_dump_info (REPORT_ALIGNMENT))
2033 fprintf (vect_dump, "Alignment of access forced using versioning.");
2036 if (vect_print_dump_info (REPORT_DETAILS))
2037 fprintf (vect_dump, "Versioning for alignment will be applied.");
2039 /* Peeling and versioning can't be done together at this time. */
2040 gcc_assert (! (do_peeling && do_versioning));
2042 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2043 gcc_assert (stat);
2044 return stat;
2047 /* This point is reached if neither peeling nor versioning is being done. */
2048 gcc_assert (! (do_peeling || do_versioning));
2050 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2051 return stat;
2055 /* Function vect_find_same_alignment_drs.
2057 Update group and alignment relations according to the chosen
2058 vectorization factor. */
2060 static void
2061 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
2062 loop_vec_info loop_vinfo)
2064 unsigned int i;
2065 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2066 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2067 struct data_reference *dra = DDR_A (ddr);
2068 struct data_reference *drb = DDR_B (ddr);
2069 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2070 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2071 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
2072 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
2073 lambda_vector dist_v;
2074 unsigned int loop_depth;
2076 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2077 return;
2079 if (dra == drb)
2080 return;
2082 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2083 return;
2085 /* Loop-based vectorization and known data dependence. */
2086 if (DDR_NUM_DIST_VECTS (ddr) == 0)
2087 return;
2089 /* Data-dependence analysis reports a distance vector of zero
2090 for data-references that overlap only in the first iteration
2091 but have different sign step (see PR45764).
2092 So as a sanity check require equal DR_STEP. */
2093 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2094 return;
2096 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
2097 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
2099 int dist = dist_v[loop_depth];
2101 if (vect_print_dump_info (REPORT_DR_DETAILS))
2102 fprintf (vect_dump, "dependence distance = %d.", dist);
2104 /* Same loop iteration. */
2105 if (dist == 0
2106 || (dist % vectorization_factor == 0 && dra_size == drb_size))
2108 /* Two references with distance zero have the same alignment. */
2109 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
2110 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
2111 if (vect_print_dump_info (REPORT_ALIGNMENT))
2112 fprintf (vect_dump, "accesses have the same alignment.");
2113 if (vect_print_dump_info (REPORT_DR_DETAILS))
2115 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
2116 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
2117 fprintf (vect_dump, " and ");
2118 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
2125 /* Function vect_analyze_data_refs_alignment
2127 Analyze the alignment of the data-references in the loop.
2128 Return FALSE if a data reference is found that cannot be vectorized. */
2130 bool
2131 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2132 bb_vec_info bb_vinfo)
2134 if (vect_print_dump_info (REPORT_DETAILS))
2135 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2137 /* Mark groups of data references with same alignment using
2138 data dependence information. */
2139 if (loop_vinfo)
2141 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2142 struct data_dependence_relation *ddr;
2143 unsigned int i;
2145 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2146 vect_find_same_alignment_drs (ddr, loop_vinfo);
2149 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2151 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2152 fprintf (vect_dump,
2153 "not vectorized: can't calculate alignment for data ref.");
2154 return false;
2157 return true;
2161 /* Analyze groups of accesses: check that DR belongs to a group of
2162 accesses of legal size, step, etc. Detect gaps, single element
2163 interleaving, and other special cases. Set grouped access info.
2164 Collect groups of strided stores for further use in SLP analysis. */
2166 static bool
2167 vect_analyze_group_access (struct data_reference *dr)
2169 tree step = DR_STEP (dr);
2170 tree scalar_type = TREE_TYPE (DR_REF (dr));
2171 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2172 gimple stmt = DR_STMT (dr);
2173 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2174 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2175 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2176 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2177 HOST_WIDE_INT groupsize, last_accessed_element = 1;
2178 bool slp_impossible = false;
2179 struct loop *loop = NULL;
2181 if (loop_vinfo)
2182 loop = LOOP_VINFO_LOOP (loop_vinfo);
2184 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2185 size of the interleaving group (including gaps). */
2186 groupsize = dr_step / type_size;
2188 /* Not consecutive access is possible only if it is a part of interleaving. */
2189 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2191 /* Check if it this DR is a part of interleaving, and is a single
2192 element of the group that is accessed in the loop. */
2194 /* Gaps are supported only for loads. STEP must be a multiple of the type
2195 size. The size of the group must be a power of 2. */
2196 if (DR_IS_READ (dr)
2197 && (dr_step % type_size) == 0
2198 && groupsize > 0
2199 && exact_log2 (groupsize) != -1)
2201 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2202 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2203 if (vect_print_dump_info (REPORT_DR_DETAILS))
2205 fprintf (vect_dump, "Detected single element interleaving ");
2206 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2207 fprintf (vect_dump, " step ");
2208 print_generic_expr (vect_dump, step, TDF_SLIM);
2211 if (loop_vinfo)
2213 if (vect_print_dump_info (REPORT_DETAILS))
2214 fprintf (vect_dump, "Data access with gaps requires scalar "
2215 "epilogue loop");
2216 if (loop->inner)
2218 if (vect_print_dump_info (REPORT_DETAILS))
2219 fprintf (vect_dump, "Peeling for outer loop is not"
2220 " supported");
2221 return false;
2224 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2227 return true;
2230 if (vect_print_dump_info (REPORT_DETAILS))
2232 fprintf (vect_dump, "not consecutive access ");
2233 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2236 if (bb_vinfo)
2238 /* Mark the statement as unvectorizable. */
2239 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2240 return true;
2243 return false;
2246 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2248 /* First stmt in the interleaving chain. Check the chain. */
2249 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2250 struct data_reference *data_ref = dr;
2251 unsigned int count = 1;
2252 tree next_step;
2253 tree prev_init = DR_INIT (data_ref);
2254 gimple prev = stmt;
2255 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2257 while (next)
2259 /* Skip same data-refs. In case that two or more stmts share
2260 data-ref (supported only for loads), we vectorize only the first
2261 stmt, and the rest get their vectorized loads from the first
2262 one. */
2263 if (!tree_int_cst_compare (DR_INIT (data_ref),
2264 DR_INIT (STMT_VINFO_DATA_REF (
2265 vinfo_for_stmt (next)))))
2267 if (DR_IS_WRITE (data_ref))
2269 if (vect_print_dump_info (REPORT_DETAILS))
2270 fprintf (vect_dump, "Two store stmts share the same dr.");
2271 return false;
2274 /* Check that there is no load-store dependencies for this loads
2275 to prevent a case of load-store-load to the same location. */
2276 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2277 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2279 if (vect_print_dump_info (REPORT_DETAILS))
2280 fprintf (vect_dump,
2281 "READ_WRITE dependence in interleaving.");
2282 return false;
2285 /* For load use the same data-ref load. */
2286 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2288 prev = next;
2289 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2290 continue;
2293 prev = next;
2295 /* Check that all the accesses have the same STEP. */
2296 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2297 if (tree_int_cst_compare (step, next_step))
2299 if (vect_print_dump_info (REPORT_DETAILS))
2300 fprintf (vect_dump, "not consecutive access in interleaving");
2301 return false;
2304 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2305 /* Check that the distance between two accesses is equal to the type
2306 size. Otherwise, we have gaps. */
2307 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2308 - TREE_INT_CST_LOW (prev_init)) / type_size;
2309 if (diff != 1)
2311 /* FORNOW: SLP of accesses with gaps is not supported. */
2312 slp_impossible = true;
2313 if (DR_IS_WRITE (data_ref))
2315 if (vect_print_dump_info (REPORT_DETAILS))
2316 fprintf (vect_dump, "interleaved store with gaps");
2317 return false;
2320 gaps += diff - 1;
2323 last_accessed_element += diff;
2325 /* Store the gap from the previous member of the group. If there is no
2326 gap in the access, GROUP_GAP is always 1. */
2327 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2329 prev_init = DR_INIT (data_ref);
2330 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2331 /* Count the number of data-refs in the chain. */
2332 count++;
2335 /* COUNT is the number of accesses found, we multiply it by the size of
2336 the type to get COUNT_IN_BYTES. */
2337 count_in_bytes = type_size * count;
2339 /* Check that the size of the interleaving (including gaps) is not
2340 greater than STEP. */
2341 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2343 if (vect_print_dump_info (REPORT_DETAILS))
2345 fprintf (vect_dump, "interleaving size is greater than step for ");
2346 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2348 return false;
2351 /* Check that the size of the interleaving is equal to STEP for stores,
2352 i.e., that there are no gaps. */
2353 if (dr_step && dr_step != count_in_bytes)
2355 if (DR_IS_READ (dr))
2357 slp_impossible = true;
2358 /* There is a gap after the last load in the group. This gap is a
2359 difference between the groupsize and the number of elements.
2360 When there is no gap, this difference should be 0. */
2361 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
2363 else
2365 if (vect_print_dump_info (REPORT_DETAILS))
2366 fprintf (vect_dump, "interleaved store with gaps");
2367 return false;
2371 /* Check that STEP is a multiple of type size. */
2372 if (dr_step && (dr_step % type_size) != 0)
2374 if (vect_print_dump_info (REPORT_DETAILS))
2376 fprintf (vect_dump, "step is not a multiple of type size: step ");
2377 print_generic_expr (vect_dump, step, TDF_SLIM);
2378 fprintf (vect_dump, " size ");
2379 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2380 TDF_SLIM);
2382 return false;
2385 if (groupsize == 0)
2386 groupsize = count;
2388 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2389 if (vect_print_dump_info (REPORT_DETAILS))
2390 fprintf (vect_dump, "Detected interleaving of size %d", (int)groupsize);
2392 /* SLP: create an SLP data structure for every interleaving group of
2393 stores for further analysis in vect_analyse_slp. */
2394 if (DR_IS_WRITE (dr) && !slp_impossible)
2396 if (loop_vinfo)
2397 VEC_safe_push (gimple, heap, LOOP_VINFO_GROUPED_STORES (loop_vinfo),
2398 stmt);
2399 if (bb_vinfo)
2400 VEC_safe_push (gimple, heap, BB_VINFO_GROUPED_STORES (bb_vinfo),
2401 stmt);
2404 /* There is a gap in the end of the group. */
2405 if (groupsize - last_accessed_element > 0 && loop_vinfo)
2407 if (vect_print_dump_info (REPORT_DETAILS))
2408 fprintf (vect_dump, "Data access with gaps requires scalar "
2409 "epilogue loop");
2410 if (loop->inner)
2412 if (vect_print_dump_info (REPORT_DETAILS))
2413 fprintf (vect_dump, "Peeling for outer loop is not supported");
2414 return false;
2417 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2421 return true;
2425 /* Analyze the access pattern of the data-reference DR.
2426 In case of non-consecutive accesses call vect_analyze_group_access() to
2427 analyze groups of accesses. */
2429 static bool
2430 vect_analyze_data_ref_access (struct data_reference *dr)
2432 tree step = DR_STEP (dr);
2433 tree scalar_type = TREE_TYPE (DR_REF (dr));
2434 gimple stmt = DR_STMT (dr);
2435 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2436 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2437 struct loop *loop = NULL;
2439 if (loop_vinfo)
2440 loop = LOOP_VINFO_LOOP (loop_vinfo);
2442 if (loop_vinfo && !step)
2444 if (vect_print_dump_info (REPORT_DETAILS))
2445 fprintf (vect_dump, "bad data-ref access in loop");
2446 return false;
2449 /* Allow invariant loads in loops. */
2450 if (loop_vinfo && integer_zerop (step))
2452 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2453 return DR_IS_READ (dr);
2456 if (loop && nested_in_vect_loop_p (loop, stmt))
2458 /* Interleaved accesses are not yet supported within outer-loop
2459 vectorization for references in the inner-loop. */
2460 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2462 /* For the rest of the analysis we use the outer-loop step. */
2463 step = STMT_VINFO_DR_STEP (stmt_info);
2464 if (integer_zerop (step))
2466 if (vect_print_dump_info (REPORT_ALIGNMENT))
2467 fprintf (vect_dump, "zero step in outer loop.");
2468 if (DR_IS_READ (dr))
2469 return true;
2470 else
2471 return false;
2475 /* Consecutive? */
2476 if (TREE_CODE (step) == INTEGER_CST)
2478 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2479 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2480 || (dr_step < 0
2481 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2483 /* Mark that it is not interleaving. */
2484 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2485 return true;
2489 if (loop && nested_in_vect_loop_p (loop, stmt))
2491 if (vect_print_dump_info (REPORT_ALIGNMENT))
2492 fprintf (vect_dump, "grouped access in outer loop.");
2493 return false;
2496 /* Assume this is a DR handled by non-constant strided load case. */
2497 if (TREE_CODE (step) != INTEGER_CST)
2498 return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2500 /* Not consecutive access - check if it's a part of interleaving group. */
2501 return vect_analyze_group_access (dr);
2505 /* Function vect_analyze_data_ref_accesses.
2507 Analyze the access pattern of all the data references in the loop.
2509 FORNOW: the only access pattern that is considered vectorizable is a
2510 simple step 1 (consecutive) access.
2512 FORNOW: handle only arrays and pointer accesses. */
2514 bool
2515 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2517 unsigned int i;
2518 VEC (data_reference_p, heap) *datarefs;
2519 struct data_reference *dr;
2521 if (vect_print_dump_info (REPORT_DETAILS))
2522 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2524 if (loop_vinfo)
2525 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2526 else
2527 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2529 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2530 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2531 && !vect_analyze_data_ref_access (dr))
2533 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2534 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2536 if (bb_vinfo)
2538 /* Mark the statement as not vectorizable. */
2539 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2540 continue;
2542 else
2543 return false;
2546 return true;
2549 /* Function vect_prune_runtime_alias_test_list.
2551 Prune a list of ddrs to be tested at run-time by versioning for alias.
2552 Return FALSE if resulting list of ddrs is longer then allowed by
2553 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2555 bool
2556 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2558 VEC (ddr_p, heap) * ddrs =
2559 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2560 unsigned i, j;
2562 if (vect_print_dump_info (REPORT_DETAILS))
2563 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2565 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2567 bool found;
2568 ddr_p ddr_i;
2570 ddr_i = VEC_index (ddr_p, ddrs, i);
2571 found = false;
2573 for (j = 0; j < i; j++)
2575 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2577 if (vect_vfa_range_equal (ddr_i, ddr_j))
2579 if (vect_print_dump_info (REPORT_DR_DETAILS))
2581 fprintf (vect_dump, "found equal ranges ");
2582 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2583 fprintf (vect_dump, ", ");
2584 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2585 fprintf (vect_dump, " and ");
2586 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2587 fprintf (vect_dump, ", ");
2588 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2590 found = true;
2591 break;
2595 if (found)
2597 VEC_ordered_remove (ddr_p, ddrs, i);
2598 continue;
2600 i++;
2603 if (VEC_length (ddr_p, ddrs) >
2604 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2606 if (vect_print_dump_info (REPORT_DR_DETAILS))
2608 fprintf (vect_dump,
2609 "disable versioning for alias - max number of generated "
2610 "checks exceeded.");
2613 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2615 return false;
2618 return true;
2621 /* Check whether a non-affine read in stmt is suitable for gather load
2622 and if so, return a builtin decl for that operation. */
2624 tree
2625 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2626 tree *offp, int *scalep)
2628 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2629 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2631 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2632 tree offtype = NULL_TREE;
2633 tree decl, base, off;
2634 enum machine_mode pmode;
2635 int punsignedp, pvolatilep;
2637 /* The gather builtins need address of the form
2638 loop_invariant + vector * {1, 2, 4, 8}
2640 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2641 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2642 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2643 multiplications and additions in it. To get a vector, we need
2644 a single SSA_NAME that will be defined in the loop and will
2645 contain everything that is not loop invariant and that can be
2646 vectorized. The following code attempts to find such a preexistng
2647 SSA_NAME OFF and put the loop invariants into a tree BASE
2648 that can be gimplified before the loop. */
2649 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2650 &pmode, &punsignedp, &pvolatilep, false);
2651 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2653 if (TREE_CODE (base) == MEM_REF)
2655 if (!integer_zerop (TREE_OPERAND (base, 1)))
2657 if (off == NULL_TREE)
2659 double_int moff = mem_ref_offset (base);
2660 off = double_int_to_tree (sizetype, moff);
2662 else
2663 off = size_binop (PLUS_EXPR, off,
2664 fold_convert (sizetype, TREE_OPERAND (base, 1)));
2666 base = TREE_OPERAND (base, 0);
2668 else
2669 base = build_fold_addr_expr (base);
2671 if (off == NULL_TREE)
2672 off = size_zero_node;
2674 /* If base is not loop invariant, either off is 0, then we start with just
2675 the constant offset in the loop invariant BASE and continue with base
2676 as OFF, otherwise give up.
2677 We could handle that case by gimplifying the addition of base + off
2678 into some SSA_NAME and use that as off, but for now punt. */
2679 if (!expr_invariant_in_loop_p (loop, base))
2681 if (!integer_zerop (off))
2682 return NULL_TREE;
2683 off = base;
2684 base = size_int (pbitpos / BITS_PER_UNIT);
2686 /* Otherwise put base + constant offset into the loop invariant BASE
2687 and continue with OFF. */
2688 else
2690 base = fold_convert (sizetype, base);
2691 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2694 /* OFF at this point may be either a SSA_NAME or some tree expression
2695 from get_inner_reference. Try to peel off loop invariants from it
2696 into BASE as long as possible. */
2697 STRIP_NOPS (off);
2698 while (offtype == NULL_TREE)
2700 enum tree_code code;
2701 tree op0, op1, add = NULL_TREE;
2703 if (TREE_CODE (off) == SSA_NAME)
2705 gimple def_stmt = SSA_NAME_DEF_STMT (off);
2707 if (expr_invariant_in_loop_p (loop, off))
2708 return NULL_TREE;
2710 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2711 break;
2713 op0 = gimple_assign_rhs1 (def_stmt);
2714 code = gimple_assign_rhs_code (def_stmt);
2715 op1 = gimple_assign_rhs2 (def_stmt);
2717 else
2719 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2720 return NULL_TREE;
2721 code = TREE_CODE (off);
2722 extract_ops_from_tree (off, &code, &op0, &op1);
2724 switch (code)
2726 case POINTER_PLUS_EXPR:
2727 case PLUS_EXPR:
2728 if (expr_invariant_in_loop_p (loop, op0))
2730 add = op0;
2731 off = op1;
2732 do_add:
2733 add = fold_convert (sizetype, add);
2734 if (scale != 1)
2735 add = size_binop (MULT_EXPR, add, size_int (scale));
2736 base = size_binop (PLUS_EXPR, base, add);
2737 continue;
2739 if (expr_invariant_in_loop_p (loop, op1))
2741 add = op1;
2742 off = op0;
2743 goto do_add;
2745 break;
2746 case MINUS_EXPR:
2747 if (expr_invariant_in_loop_p (loop, op1))
2749 add = fold_convert (sizetype, op1);
2750 add = size_binop (MINUS_EXPR, size_zero_node, add);
2751 off = op0;
2752 goto do_add;
2754 break;
2755 case MULT_EXPR:
2756 if (scale == 1 && host_integerp (op1, 0))
2758 scale = tree_low_cst (op1, 0);
2759 off = op0;
2760 continue;
2762 break;
2763 case SSA_NAME:
2764 off = op0;
2765 continue;
2766 CASE_CONVERT:
2767 if (!POINTER_TYPE_P (TREE_TYPE (op0))
2768 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2769 break;
2770 if (TYPE_PRECISION (TREE_TYPE (op0))
2771 == TYPE_PRECISION (TREE_TYPE (off)))
2773 off = op0;
2774 continue;
2776 if (TYPE_PRECISION (TREE_TYPE (op0))
2777 < TYPE_PRECISION (TREE_TYPE (off)))
2779 off = op0;
2780 offtype = TREE_TYPE (off);
2781 STRIP_NOPS (off);
2782 continue;
2784 break;
2785 default:
2786 break;
2788 break;
2791 /* If at the end OFF still isn't a SSA_NAME or isn't
2792 defined in the loop, punt. */
2793 if (TREE_CODE (off) != SSA_NAME
2794 || expr_invariant_in_loop_p (loop, off))
2795 return NULL_TREE;
2797 if (offtype == NULL_TREE)
2798 offtype = TREE_TYPE (off);
2800 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2801 offtype, scale);
2802 if (decl == NULL_TREE)
2803 return NULL_TREE;
2805 if (basep)
2806 *basep = base;
2807 if (offp)
2808 *offp = off;
2809 if (scalep)
2810 *scalep = scale;
2811 return decl;
2814 /* Check wether a non-affine load in STMT (being in the loop referred to
2815 in LOOP_VINFO) is suitable for handling as strided load. That is the case
2816 if its address is a simple induction variable. If so return the base
2817 of that induction variable in *BASEP and the (loop-invariant) step
2818 in *STEPP, both only when that pointer is non-zero.
2820 This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
2821 base pointer) only. */
2823 bool
2824 vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2825 tree *stepp)
2827 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2828 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2829 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2830 tree base, off;
2831 affine_iv iv;
2833 if (!DR_IS_READ (dr))
2834 return false;
2836 base = DR_REF (dr);
2838 if (TREE_CODE (base) == ARRAY_REF)
2840 off = TREE_OPERAND (base, 1);
2841 base = TREE_OPERAND (base, 0);
2843 else if (TREE_CODE (base) == MEM_REF)
2845 off = TREE_OPERAND (base, 0);
2846 base = TREE_OPERAND (base, 1);
2848 else
2849 return false;
2851 if (TREE_CODE (off) != SSA_NAME)
2852 return false;
2854 if (!expr_invariant_in_loop_p (loop, base)
2855 || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
2856 return false;
2858 if (basep)
2859 *basep = iv.base;
2860 if (stepp)
2861 *stepp = iv.step;
2862 return true;
2865 /* Function vect_analyze_data_refs.
2867 Find all the data references in the loop or basic block.
2869 The general structure of the analysis of data refs in the vectorizer is as
2870 follows:
2871 1- vect_analyze_data_refs(loop/bb): call
2872 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2873 in the loop/bb and their dependences.
2874 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2875 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2876 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2880 bool
2881 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2882 bb_vec_info bb_vinfo,
2883 int *min_vf)
2885 struct loop *loop = NULL;
2886 basic_block bb = NULL;
2887 unsigned int i;
2888 VEC (data_reference_p, heap) *datarefs;
2889 struct data_reference *dr;
2890 tree scalar_type;
2891 bool res, stop_bb_analysis = false;
2893 if (vect_print_dump_info (REPORT_DETAILS))
2894 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2896 if (loop_vinfo)
2898 loop = LOOP_VINFO_LOOP (loop_vinfo);
2899 res = compute_data_dependences_for_loop
2900 (loop, true,
2901 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2902 &LOOP_VINFO_DATAREFS (loop_vinfo),
2903 &LOOP_VINFO_DDRS (loop_vinfo));
2905 if (!res)
2907 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2908 fprintf (vect_dump, "not vectorized: loop contains function calls"
2909 " or data references that cannot be analyzed");
2910 return false;
2913 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2915 else
2917 gimple_stmt_iterator gsi;
2919 bb = BB_VINFO_BB (bb_vinfo);
2920 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2922 gimple stmt = gsi_stmt (gsi);
2923 if (!find_data_references_in_stmt (NULL, stmt,
2924 &BB_VINFO_DATAREFS (bb_vinfo)))
2926 /* Mark the rest of the basic-block as unvectorizable. */
2927 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2929 stmt = gsi_stmt (gsi);
2930 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
2932 break;
2935 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
2936 &BB_VINFO_DDRS (bb_vinfo), NULL, true))
2938 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2939 fprintf (vect_dump, "not vectorized: basic block contains function"
2940 " calls or data references that cannot be analyzed");
2941 return false;
2944 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2947 /* Go through the data-refs, check that the analysis succeeded. Update
2948 pointer from stmt_vec_info struct to DR and vectype. */
2950 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2952 gimple stmt;
2953 stmt_vec_info stmt_info;
2954 tree base, offset, init;
2955 bool gather = false;
2956 int vf;
2958 if (!dr || !DR_REF (dr))
2960 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2961 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2963 return false;
2966 stmt = DR_STMT (dr);
2967 stmt_info = vinfo_for_stmt (stmt);
2969 if (stop_bb_analysis)
2971 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2972 continue;
2975 /* Check that analysis of the data-ref succeeded. */
2976 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2977 || !DR_STEP (dr))
2979 /* If target supports vector gather loads, see if they can't
2980 be used. */
2981 if (loop_vinfo
2982 && DR_IS_READ (dr)
2983 && !TREE_THIS_VOLATILE (DR_REF (dr))
2984 && targetm.vectorize.builtin_gather != NULL
2985 && !nested_in_vect_loop_p (loop, stmt))
2987 struct data_reference *newdr
2988 = create_data_ref (NULL, loop_containing_stmt (stmt),
2989 DR_REF (dr), stmt, true);
2990 gcc_assert (newdr != NULL && DR_REF (newdr));
2991 if (DR_BASE_ADDRESS (newdr)
2992 && DR_OFFSET (newdr)
2993 && DR_INIT (newdr)
2994 && DR_STEP (newdr)
2995 && integer_zerop (DR_STEP (newdr)))
2997 dr = newdr;
2998 gather = true;
3000 else
3001 free_data_ref (newdr);
3004 if (!gather)
3006 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3008 fprintf (vect_dump, "not vectorized: data ref analysis "
3009 "failed ");
3010 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3013 if (bb_vinfo)
3015 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3016 stop_bb_analysis = true;
3017 continue;
3020 return false;
3024 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3026 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3027 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3028 "constant");
3030 if (bb_vinfo)
3032 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3033 stop_bb_analysis = true;
3034 continue;
3037 if (gather)
3038 free_data_ref (dr);
3039 return false;
3042 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3044 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3046 fprintf (vect_dump, "not vectorized: volatile type ");
3047 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3050 if (bb_vinfo)
3052 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3053 stop_bb_analysis = true;
3054 continue;
3057 return false;
3060 if (stmt_can_throw_internal (stmt))
3062 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3064 fprintf (vect_dump, "not vectorized: statement can throw an "
3065 "exception ");
3066 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3069 if (bb_vinfo)
3071 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3072 stop_bb_analysis = true;
3073 continue;
3076 if (gather)
3077 free_data_ref (dr);
3078 return false;
3081 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3082 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3084 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3086 fprintf (vect_dump, "not vectorized: statement is bitfield "
3087 "access ");
3088 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3091 if (bb_vinfo)
3093 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3094 stop_bb_analysis = true;
3095 continue;
3098 if (gather)
3099 free_data_ref (dr);
3100 return false;
3103 base = unshare_expr (DR_BASE_ADDRESS (dr));
3104 offset = unshare_expr (DR_OFFSET (dr));
3105 init = unshare_expr (DR_INIT (dr));
3107 if (is_gimple_call (stmt))
3109 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3111 fprintf (vect_dump, "not vectorized: dr in a call ");
3112 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3115 if (bb_vinfo)
3117 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3118 stop_bb_analysis = true;
3119 continue;
3122 if (gather)
3123 free_data_ref (dr);
3124 return false;
3127 /* Update DR field in stmt_vec_info struct. */
3129 /* If the dataref is in an inner-loop of the loop that is considered for
3130 for vectorization, we also want to analyze the access relative to
3131 the outer-loop (DR contains information only relative to the
3132 inner-most enclosing loop). We do that by building a reference to the
3133 first location accessed by the inner-loop, and analyze it relative to
3134 the outer-loop. */
3135 if (loop && nested_in_vect_loop_p (loop, stmt))
3137 tree outer_step, outer_base, outer_init;
3138 HOST_WIDE_INT pbitsize, pbitpos;
3139 tree poffset;
3140 enum machine_mode pmode;
3141 int punsignedp, pvolatilep;
3142 affine_iv base_iv, offset_iv;
3143 tree dinit;
3145 /* Build a reference to the first location accessed by the
3146 inner-loop: *(BASE+INIT). (The first location is actually
3147 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3148 tree inner_base = build_fold_indirect_ref
3149 (fold_build_pointer_plus (base, init));
3151 if (vect_print_dump_info (REPORT_DETAILS))
3153 fprintf (vect_dump, "analyze in outer-loop: ");
3154 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
3157 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3158 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3159 gcc_assert (outer_base != NULL_TREE);
3161 if (pbitpos % BITS_PER_UNIT != 0)
3163 if (vect_print_dump_info (REPORT_DETAILS))
3164 fprintf (vect_dump, "failed: bit offset alignment.\n");
3165 return false;
3168 outer_base = build_fold_addr_expr (outer_base);
3169 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3170 &base_iv, false))
3172 if (vect_print_dump_info (REPORT_DETAILS))
3173 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
3174 return false;
3177 if (offset)
3179 if (poffset)
3180 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
3181 poffset);
3182 else
3183 poffset = offset;
3186 if (!poffset)
3188 offset_iv.base = ssize_int (0);
3189 offset_iv.step = ssize_int (0);
3191 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3192 &offset_iv, false))
3194 if (vect_print_dump_info (REPORT_DETAILS))
3195 fprintf (vect_dump, "evolution of offset is not affine.\n");
3196 return false;
3199 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3200 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3201 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3202 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3203 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3205 outer_step = size_binop (PLUS_EXPR,
3206 fold_convert (ssizetype, base_iv.step),
3207 fold_convert (ssizetype, offset_iv.step));
3209 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3210 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3211 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3212 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3213 STMT_VINFO_DR_OFFSET (stmt_info) =
3214 fold_convert (ssizetype, offset_iv.base);
3215 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3216 size_int (highest_pow2_factor (offset_iv.base));
3218 if (vect_print_dump_info (REPORT_DETAILS))
3220 fprintf (vect_dump, "\touter base_address: ");
3221 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3222 fprintf (vect_dump, "\n\touter offset from base address: ");
3223 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3224 fprintf (vect_dump, "\n\touter constant offset from base address: ");
3225 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3226 fprintf (vect_dump, "\n\touter step: ");
3227 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3228 fprintf (vect_dump, "\n\touter aligned to: ");
3229 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3233 if (STMT_VINFO_DATA_REF (stmt_info))
3235 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3237 fprintf (vect_dump,
3238 "not vectorized: more than one data ref in stmt: ");
3239 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3242 if (bb_vinfo)
3244 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3245 stop_bb_analysis = true;
3246 continue;
3249 if (gather)
3250 free_data_ref (dr);
3251 return false;
3254 STMT_VINFO_DATA_REF (stmt_info) = dr;
3256 /* Set vectype for STMT. */
3257 scalar_type = TREE_TYPE (DR_REF (dr));
3258 STMT_VINFO_VECTYPE (stmt_info) =
3259 get_vectype_for_scalar_type (scalar_type);
3260 if (!STMT_VINFO_VECTYPE (stmt_info))
3262 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3264 fprintf (vect_dump,
3265 "not vectorized: no vectype for stmt: ");
3266 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3267 fprintf (vect_dump, " scalar_type: ");
3268 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3271 if (bb_vinfo)
3273 /* Mark the statement as not vectorizable. */
3274 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3275 stop_bb_analysis = true;
3276 continue;
3279 if (gather)
3281 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3282 free_data_ref (dr);
3284 return false;
3287 /* Adjust the minimal vectorization factor according to the
3288 vector type. */
3289 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3290 if (vf > *min_vf)
3291 *min_vf = vf;
3293 if (gather)
3295 unsigned int j, k, n;
3296 struct data_reference *olddr
3297 = VEC_index (data_reference_p, datarefs, i);
3298 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3299 struct data_dependence_relation *ddr, *newddr;
3300 bool bad = false;
3301 tree off;
3302 VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3304 gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3305 if (gather
3306 && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3307 gather = false;
3308 if (!gather)
3310 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3312 fprintf (vect_dump,
3313 "not vectorized: not suitable for gather load ");
3314 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3316 return false;
3319 n = VEC_length (data_reference_p, datarefs) - 1;
3320 for (j = 0, k = i - 1; j < i; j++)
3322 ddr = VEC_index (ddr_p, ddrs, k);
3323 gcc_assert (DDR_B (ddr) == olddr);
3324 newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3325 nest);
3326 VEC_replace (ddr_p, ddrs, k, newddr);
3327 free_dependence_relation (ddr);
3328 if (!bad
3329 && DR_IS_WRITE (DDR_A (newddr))
3330 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3331 bad = true;
3332 k += --n;
3335 k++;
3336 n = k + VEC_length (data_reference_p, datarefs) - i - 1;
3337 for (; k < n; k++)
3339 ddr = VEC_index (ddr_p, ddrs, k);
3340 gcc_assert (DDR_A (ddr) == olddr);
3341 newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3342 nest);
3343 VEC_replace (ddr_p, ddrs, k, newddr);
3344 free_dependence_relation (ddr);
3345 if (!bad
3346 && DR_IS_WRITE (DDR_B (newddr))
3347 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3348 bad = true;
3351 k = VEC_length (ddr_p, ddrs)
3352 - VEC_length (data_reference_p, datarefs) + i;
3353 ddr = VEC_index (ddr_p, ddrs, k);
3354 gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3355 newddr = initialize_data_dependence_relation (dr, dr, nest);
3356 VEC_replace (ddr_p, ddrs, k, newddr);
3357 free_dependence_relation (ddr);
3358 VEC_replace (data_reference_p, datarefs, i, dr);
3360 if (bad)
3362 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3364 fprintf (vect_dump,
3365 "not vectorized: data dependence conflict"
3366 " prevents gather load");
3367 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3369 return false;
3372 STMT_VINFO_GATHER_P (stmt_info) = true;
3374 else if (loop_vinfo
3375 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3377 bool strided_load = false;
3378 if (!nested_in_vect_loop_p (loop, stmt))
3379 strided_load
3380 = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
3381 if (!strided_load)
3383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3385 fprintf (vect_dump,
3386 "not vectorized: not suitable for strided load ");
3387 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3389 return false;
3391 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
3395 return true;
3399 /* Function vect_get_new_vect_var.
3401 Returns a name for a new variable. The current naming scheme appends the
3402 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3403 the name of vectorizer generated variables, and appends that to NAME if
3404 provided. */
3406 tree
3407 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3409 const char *prefix;
3410 tree new_vect_var;
3412 switch (var_kind)
3414 case vect_simple_var:
3415 prefix = "vect_";
3416 break;
3417 case vect_scalar_var:
3418 prefix = "stmp_";
3419 break;
3420 case vect_pointer_var:
3421 prefix = "vect_p";
3422 break;
3423 default:
3424 gcc_unreachable ();
3427 if (name)
3429 char* tmp = concat (prefix, name, NULL);
3430 new_vect_var = create_tmp_reg (type, tmp);
3431 free (tmp);
3433 else
3434 new_vect_var = create_tmp_reg (type, prefix);
3436 return new_vect_var;
3440 /* Function vect_create_addr_base_for_vector_ref.
3442 Create an expression that computes the address of the first memory location
3443 that will be accessed for a data reference.
3445 Input:
3446 STMT: The statement containing the data reference.
3447 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3448 OFFSET: Optional. If supplied, it is be added to the initial address.
3449 LOOP: Specify relative to which loop-nest should the address be computed.
3450 For example, when the dataref is in an inner-loop nested in an
3451 outer-loop that is now being vectorized, LOOP can be either the
3452 outer-loop, or the inner-loop. The first memory location accessed
3453 by the following dataref ('in' points to short):
3455 for (i=0; i<N; i++)
3456 for (j=0; j<M; j++)
3457 s += in[i+j]
3459 is as follows:
3460 if LOOP=i_loop: &in (relative to i_loop)
3461 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3463 Output:
3464 1. Return an SSA_NAME whose value is the address of the memory location of
3465 the first vector of the data reference.
3466 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3467 these statement(s) which define the returned SSA_NAME.
3469 FORNOW: We are only handling array accesses with step 1. */
3471 tree
3472 vect_create_addr_base_for_vector_ref (gimple stmt,
3473 gimple_seq *new_stmt_list,
3474 tree offset,
3475 struct loop *loop)
3477 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3478 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3479 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3480 tree base_name;
3481 tree data_ref_base_var;
3482 tree vec_stmt;
3483 tree addr_base, addr_expr;
3484 tree dest;
3485 gimple_seq seq = NULL;
3486 tree base_offset = unshare_expr (DR_OFFSET (dr));
3487 tree init = unshare_expr (DR_INIT (dr));
3488 tree vect_ptr_type;
3489 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3490 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3491 tree base;
3493 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3495 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3497 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3499 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3500 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3501 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3504 if (loop_vinfo)
3505 base_name = build_fold_indirect_ref (data_ref_base);
3506 else
3508 base_offset = ssize_int (0);
3509 init = ssize_int (0);
3510 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
3513 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3514 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3515 data_ref_base_var);
3516 gimple_seq_add_seq (new_stmt_list, seq);
3518 /* Create base_offset */
3519 base_offset = size_binop (PLUS_EXPR,
3520 fold_convert (sizetype, base_offset),
3521 fold_convert (sizetype, init));
3522 dest = create_tmp_var (sizetype, "base_off");
3523 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3524 gimple_seq_add_seq (new_stmt_list, seq);
3526 if (offset)
3528 tree tmp = create_tmp_var (sizetype, "offset");
3530 offset = fold_build2 (MULT_EXPR, sizetype,
3531 fold_convert (sizetype, offset), step);
3532 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3533 base_offset, offset);
3534 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3535 gimple_seq_add_seq (new_stmt_list, seq);
3538 /* base + base_offset */
3539 if (loop_vinfo)
3540 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3541 else
3543 addr_base = build1 (ADDR_EXPR,
3544 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3545 unshare_expr (DR_REF (dr)));
3548 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3549 base = get_base_address (DR_REF (dr));
3550 if (base
3551 && TREE_CODE (base) == MEM_REF)
3552 vect_ptr_type
3553 = build_qualified_type (vect_ptr_type,
3554 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3556 vec_stmt = fold_convert (vect_ptr_type, addr_base);
3557 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3558 get_name (base_name));
3559 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3560 gimple_seq_add_seq (new_stmt_list, seq);
3562 if (DR_PTR_INFO (dr)
3563 && TREE_CODE (vec_stmt) == SSA_NAME)
3565 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3566 if (offset)
3567 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (vec_stmt));
3570 if (vect_print_dump_info (REPORT_DETAILS))
3572 fprintf (vect_dump, "created ");
3573 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
3576 return vec_stmt;
3580 /* Function vect_create_data_ref_ptr.
3582 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3583 location accessed in the loop by STMT, along with the def-use update
3584 chain to appropriately advance the pointer through the loop iterations.
3585 Also set aliasing information for the pointer. This pointer is used by
3586 the callers to this function to create a memory reference expression for
3587 vector load/store access.
3589 Input:
3590 1. STMT: a stmt that references memory. Expected to be of the form
3591 GIMPLE_ASSIGN <name, data-ref> or
3592 GIMPLE_ASSIGN <data-ref, name>.
3593 2. AGGR_TYPE: the type of the reference, which should be either a vector
3594 or an array.
3595 3. AT_LOOP: the loop where the vector memref is to be created.
3596 4. OFFSET (optional): an offset to be added to the initial address accessed
3597 by the data-ref in STMT.
3598 5. BSI: location where the new stmts are to be placed if there is no loop
3599 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3600 pointing to the initial address.
3602 Output:
3603 1. Declare a new ptr to vector_type, and have it point to the base of the
3604 data reference (initial addressed accessed by the data reference).
3605 For example, for vector of type V8HI, the following code is generated:
3607 v8hi *ap;
3608 ap = (v8hi *)initial_address;
3610 if OFFSET is not supplied:
3611 initial_address = &a[init];
3612 if OFFSET is supplied:
3613 initial_address = &a[init + OFFSET];
3615 Return the initial_address in INITIAL_ADDRESS.
3617 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3618 update the pointer in each iteration of the loop.
3620 Return the increment stmt that updates the pointer in PTR_INCR.
3622 3. Set INV_P to true if the access pattern of the data reference in the
3623 vectorized loop is invariant. Set it to false otherwise.
3625 4. Return the pointer. */
3627 tree
3628 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3629 tree offset, tree *initial_address,
3630 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3631 bool only_init, bool *inv_p)
3633 tree base_name;
3634 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3635 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3636 struct loop *loop = NULL;
3637 bool nested_in_vect_loop = false;
3638 struct loop *containing_loop = NULL;
3639 tree aggr_ptr_type;
3640 tree aggr_ptr;
3641 tree new_temp;
3642 gimple vec_stmt;
3643 gimple_seq new_stmt_list = NULL;
3644 edge pe = NULL;
3645 basic_block new_bb;
3646 tree aggr_ptr_init;
3647 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3648 tree aptr;
3649 gimple_stmt_iterator incr_gsi;
3650 bool insert_after;
3651 bool negative;
3652 tree indx_before_incr, indx_after_incr;
3653 gimple incr;
3654 tree step;
3655 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3656 tree base;
3658 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3659 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3661 if (loop_vinfo)
3663 loop = LOOP_VINFO_LOOP (loop_vinfo);
3664 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3665 containing_loop = (gimple_bb (stmt))->loop_father;
3666 pe = loop_preheader_edge (loop);
3668 else
3670 gcc_assert (bb_vinfo);
3671 only_init = true;
3672 *ptr_incr = NULL;
3675 /* Check the step (evolution) of the load in LOOP, and record
3676 whether it's invariant. */
3677 if (nested_in_vect_loop)
3678 step = STMT_VINFO_DR_STEP (stmt_info);
3679 else
3680 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3682 if (tree_int_cst_compare (step, size_zero_node) == 0)
3683 *inv_p = true;
3684 else
3685 *inv_p = false;
3686 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3688 /* Create an expression for the first address accessed by this load
3689 in LOOP. */
3690 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3692 if (vect_print_dump_info (REPORT_DETAILS))
3694 tree data_ref_base = base_name;
3695 fprintf (vect_dump, "create %s-pointer variable to type: ",
3696 tree_code_name[(int) TREE_CODE (aggr_type)]);
3697 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3698 if (TREE_CODE (data_ref_base) == VAR_DECL
3699 || TREE_CODE (data_ref_base) == ARRAY_REF)
3700 fprintf (vect_dump, " vectorizing an array ref: ");
3701 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3702 fprintf (vect_dump, " vectorizing a record based array ref: ");
3703 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3704 fprintf (vect_dump, " vectorizing a pointer ref: ");
3705 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3708 /* (1) Create the new aggregate-pointer variable. */
3709 aggr_ptr_type = build_pointer_type (aggr_type);
3710 base = get_base_address (DR_REF (dr));
3711 if (base
3712 && TREE_CODE (base) == MEM_REF)
3713 aggr_ptr_type
3714 = build_qualified_type (aggr_ptr_type,
3715 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3716 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3717 get_name (base_name));
3719 /* Vector and array types inherit the alias set of their component
3720 type by default so we need to use a ref-all pointer if the data
3721 reference does not conflict with the created aggregated data
3722 reference because it is not addressable. */
3723 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3724 get_alias_set (DR_REF (dr))))
3726 aggr_ptr_type
3727 = build_pointer_type_for_mode (aggr_type,
3728 TYPE_MODE (aggr_ptr_type), true);
3729 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3730 get_name (base_name));
3733 /* Likewise for any of the data references in the stmt group. */
3734 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3736 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3739 tree lhs = gimple_assign_lhs (orig_stmt);
3740 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3741 get_alias_set (lhs)))
3743 aggr_ptr_type
3744 = build_pointer_type_for_mode (aggr_type,
3745 TYPE_MODE (aggr_ptr_type), true);
3746 aggr_ptr
3747 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3748 get_name (base_name));
3749 break;
3752 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3754 while (orig_stmt);
3757 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3758 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3759 def-use update cycles for the pointer: one relative to the outer-loop
3760 (LOOP), which is what steps (3) and (4) below do. The other is relative
3761 to the inner-loop (which is the inner-most loop containing the dataref),
3762 and this is done be step (5) below.
3764 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3765 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3766 redundant. Steps (3),(4) create the following:
3768 vp0 = &base_addr;
3769 LOOP: vp1 = phi(vp0,vp2)
3772 vp2 = vp1 + step
3773 goto LOOP
3775 If there is an inner-loop nested in loop, then step (5) will also be
3776 applied, and an additional update in the inner-loop will be created:
3778 vp0 = &base_addr;
3779 LOOP: vp1 = phi(vp0,vp2)
3781 inner: vp3 = phi(vp1,vp4)
3782 vp4 = vp3 + inner_step
3783 if () goto inner
3785 vp2 = vp1 + step
3786 if () goto LOOP */
3788 /* (2) Calculate the initial address of the aggregate-pointer, and set
3789 the aggregate-pointer to point to it before the loop. */
3791 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3793 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3794 offset, loop);
3795 if (new_stmt_list)
3797 if (pe)
3799 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3800 gcc_assert (!new_bb);
3802 else
3803 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3806 *initial_address = new_temp;
3808 /* Create: p = (aggr_type *) initial_base */
3809 if (TREE_CODE (new_temp) != SSA_NAME
3810 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3812 vec_stmt = gimple_build_assign (aggr_ptr,
3813 fold_convert (aggr_ptr_type, new_temp));
3814 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3815 /* Copy the points-to information if it exists. */
3816 if (DR_PTR_INFO (dr))
3817 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3818 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3819 if (pe)
3821 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3822 gcc_assert (!new_bb);
3824 else
3825 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3827 else
3828 aggr_ptr_init = new_temp;
3830 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3831 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3832 inner-loop nested in LOOP (during outer-loop vectorization). */
3834 /* No update in loop is required. */
3835 if (only_init && (!loop_vinfo || at_loop == loop))
3836 aptr = aggr_ptr_init;
3837 else
3839 /* The step of the aggregate pointer is the type size. */
3840 tree step = TYPE_SIZE_UNIT (aggr_type);
3841 /* One exception to the above is when the scalar step of the load in
3842 LOOP is zero. In this case the step here is also zero. */
3843 if (*inv_p)
3844 step = size_zero_node;
3845 else if (negative)
3846 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3848 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3850 create_iv (aggr_ptr_init,
3851 fold_convert (aggr_ptr_type, step),
3852 aggr_ptr, loop, &incr_gsi, insert_after,
3853 &indx_before_incr, &indx_after_incr);
3854 incr = gsi_stmt (incr_gsi);
3855 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3857 /* Copy the points-to information if it exists. */
3858 if (DR_PTR_INFO (dr))
3860 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3861 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3863 if (ptr_incr)
3864 *ptr_incr = incr;
3866 aptr = indx_before_incr;
3869 if (!nested_in_vect_loop || only_init)
3870 return aptr;
3873 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3874 nested in LOOP, if exists. */
3876 gcc_assert (nested_in_vect_loop);
3877 if (!only_init)
3879 standard_iv_increment_position (containing_loop, &incr_gsi,
3880 &insert_after);
3881 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3882 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3883 &indx_after_incr);
3884 incr = gsi_stmt (incr_gsi);
3885 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3887 /* Copy the points-to information if it exists. */
3888 if (DR_PTR_INFO (dr))
3890 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3891 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3893 if (ptr_incr)
3894 *ptr_incr = incr;
3896 return indx_before_incr;
3898 else
3899 gcc_unreachable ();
3903 /* Function bump_vector_ptr
3905 Increment a pointer (to a vector type) by vector-size. If requested,
3906 i.e. if PTR-INCR is given, then also connect the new increment stmt
3907 to the existing def-use update-chain of the pointer, by modifying
3908 the PTR_INCR as illustrated below:
3910 The pointer def-use update-chain before this function:
3911 DATAREF_PTR = phi (p_0, p_2)
3912 ....
3913 PTR_INCR: p_2 = DATAREF_PTR + step
3915 The pointer def-use update-chain after this function:
3916 DATAREF_PTR = phi (p_0, p_2)
3917 ....
3918 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3919 ....
3920 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3922 Input:
3923 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3924 in the loop.
3925 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3926 the loop. The increment amount across iterations is expected
3927 to be vector_size.
3928 BSI - location where the new update stmt is to be placed.
3929 STMT - the original scalar memory-access stmt that is being vectorized.
3930 BUMP - optional. The offset by which to bump the pointer. If not given,
3931 the offset is assumed to be vector_size.
3933 Output: Return NEW_DATAREF_PTR as illustrated above.
3937 tree
3938 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3939 gimple stmt, tree bump)
3941 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3942 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3943 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3944 tree update = TYPE_SIZE_UNIT (vectype);
3945 gimple incr_stmt;
3946 ssa_op_iter iter;
3947 use_operand_p use_p;
3948 tree new_dataref_ptr;
3950 if (bump)
3951 update = bump;
3953 new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
3954 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
3955 dataref_ptr, update);
3956 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3958 /* Copy the points-to information if it exists. */
3959 if (DR_PTR_INFO (dr))
3961 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3962 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
3965 if (!ptr_incr)
3966 return new_dataref_ptr;
3968 /* Update the vector-pointer's cross-iteration increment. */
3969 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3971 tree use = USE_FROM_PTR (use_p);
3973 if (use == dataref_ptr)
3974 SET_USE (use_p, new_dataref_ptr);
3975 else
3976 gcc_assert (tree_int_cst_compare (use, update) == 0);
3979 return new_dataref_ptr;
3983 /* Function vect_create_destination_var.
3985 Create a new temporary of type VECTYPE. */
3987 tree
3988 vect_create_destination_var (tree scalar_dest, tree vectype)
3990 tree vec_dest;
3991 const char *new_name;
3992 tree type;
3993 enum vect_var_kind kind;
3995 kind = vectype ? vect_simple_var : vect_scalar_var;
3996 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3998 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4000 new_name = get_name (scalar_dest);
4001 if (!new_name)
4002 new_name = "var_";
4003 vec_dest = vect_get_new_vect_var (type, kind, new_name);
4005 return vec_dest;
4008 /* Function vect_grouped_store_supported.
4010 Returns TRUE if interleave high and interleave low permutations
4011 are supported, and FALSE otherwise. */
4013 bool
4014 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
4016 enum machine_mode mode = TYPE_MODE (vectype);
4018 /* vect_permute_store_chain requires the group size to be a power of two. */
4019 if (exact_log2 (count) == -1)
4021 if (vect_print_dump_info (REPORT_DETAILS))
4022 fprintf (vect_dump, "the size of the group of accesses"
4023 " is not a power of 2");
4024 return false;
4027 /* Check that the permutation is supported. */
4028 if (VECTOR_MODE_P (mode))
4030 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4031 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4032 for (i = 0; i < nelt / 2; i++)
4034 sel[i * 2] = i;
4035 sel[i * 2 + 1] = i + nelt;
4037 if (can_vec_perm_p (mode, false, sel))
4039 for (i = 0; i < nelt; i++)
4040 sel[i] += nelt / 2;
4041 if (can_vec_perm_p (mode, false, sel))
4042 return true;
4046 if (vect_print_dump_info (REPORT_DETAILS))
4047 fprintf (vect_dump, "interleave op not supported by target.");
4048 return false;
4052 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
4053 type VECTYPE. */
4055 bool
4056 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4058 return vect_lanes_optab_supported_p ("vec_store_lanes",
4059 vec_store_lanes_optab,
4060 vectype, count);
4064 /* Function vect_permute_store_chain.
4066 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4067 a power of 2, generate interleave_high/low stmts to reorder the data
4068 correctly for the stores. Return the final references for stores in
4069 RESULT_CHAIN.
4071 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4072 The input is 4 vectors each containing 8 elements. We assign a number to
4073 each element, the input sequence is:
4075 1st vec: 0 1 2 3 4 5 6 7
4076 2nd vec: 8 9 10 11 12 13 14 15
4077 3rd vec: 16 17 18 19 20 21 22 23
4078 4th vec: 24 25 26 27 28 29 30 31
4080 The output sequence should be:
4082 1st vec: 0 8 16 24 1 9 17 25
4083 2nd vec: 2 10 18 26 3 11 19 27
4084 3rd vec: 4 12 20 28 5 13 21 30
4085 4th vec: 6 14 22 30 7 15 23 31
4087 i.e., we interleave the contents of the four vectors in their order.
4089 We use interleave_high/low instructions to create such output. The input of
4090 each interleave_high/low operation is two vectors:
4091 1st vec 2nd vec
4092 0 1 2 3 4 5 6 7
4093 the even elements of the result vector are obtained left-to-right from the
4094 high/low elements of the first vector. The odd elements of the result are
4095 obtained left-to-right from the high/low elements of the second vector.
4096 The output of interleave_high will be: 0 4 1 5
4097 and of interleave_low: 2 6 3 7
4100 The permutation is done in log LENGTH stages. In each stage interleave_high
4101 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4102 where the first argument is taken from the first half of DR_CHAIN and the
4103 second argument from it's second half.
4104 In our example,
4106 I1: interleave_high (1st vec, 3rd vec)
4107 I2: interleave_low (1st vec, 3rd vec)
4108 I3: interleave_high (2nd vec, 4th vec)
4109 I4: interleave_low (2nd vec, 4th vec)
4111 The output for the first stage is:
4113 I1: 0 16 1 17 2 18 3 19
4114 I2: 4 20 5 21 6 22 7 23
4115 I3: 8 24 9 25 10 26 11 27
4116 I4: 12 28 13 29 14 30 15 31
4118 The output of the second stage, i.e. the final result is:
4120 I1: 0 8 16 24 1 9 17 25
4121 I2: 2 10 18 26 3 11 19 27
4122 I3: 4 12 20 28 5 13 21 30
4123 I4: 6 14 22 30 7 15 23 31. */
4125 void
4126 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
4127 unsigned int length,
4128 gimple stmt,
4129 gimple_stmt_iterator *gsi,
4130 VEC(tree,heap) **result_chain)
4132 tree vect1, vect2, high, low;
4133 gimple perm_stmt;
4134 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4135 tree perm_mask_low, perm_mask_high;
4136 unsigned int i, n;
4137 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
4138 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4140 *result_chain = VEC_copy (tree, heap, dr_chain);
4142 for (i = 0, n = nelt / 2; i < n; i++)
4144 sel[i * 2] = i;
4145 sel[i * 2 + 1] = i + nelt;
4147 perm_mask_high = vect_gen_perm_mask (vectype, sel);
4148 gcc_assert (perm_mask_high != NULL);
4150 for (i = 0; i < nelt; i++)
4151 sel[i] += nelt / 2;
4152 perm_mask_low = vect_gen_perm_mask (vectype, sel);
4153 gcc_assert (perm_mask_low != NULL);
4155 for (i = 0, n = exact_log2 (length); i < n; i++)
4157 for (j = 0; j < length/2; j++)
4159 vect1 = VEC_index (tree, dr_chain, j);
4160 vect2 = VEC_index (tree, dr_chain, j+length/2);
4162 /* Create interleaving stmt:
4163 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4164 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
4165 perm_stmt
4166 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, high,
4167 vect1, vect2, perm_mask_high);
4168 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4169 VEC_replace (tree, *result_chain, 2*j, high);
4171 /* Create interleaving stmt:
4172 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4173 nelt*3/2+1, ...}> */
4174 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
4175 perm_stmt
4176 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, low,
4177 vect1, vect2, perm_mask_low);
4178 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4179 VEC_replace (tree, *result_chain, 2*j+1, low);
4181 dr_chain = VEC_copy (tree, heap, *result_chain);
4185 /* Function vect_setup_realignment
4187 This function is called when vectorizing an unaligned load using
4188 the dr_explicit_realign[_optimized] scheme.
4189 This function generates the following code at the loop prolog:
4191 p = initial_addr;
4192 x msq_init = *(floor(p)); # prolog load
4193 realignment_token = call target_builtin;
4194 loop:
4195 x msq = phi (msq_init, ---)
4197 The stmts marked with x are generated only for the case of
4198 dr_explicit_realign_optimized.
4200 The code above sets up a new (vector) pointer, pointing to the first
4201 location accessed by STMT, and a "floor-aligned" load using that pointer.
4202 It also generates code to compute the "realignment-token" (if the relevant
4203 target hook was defined), and creates a phi-node at the loop-header bb
4204 whose arguments are the result of the prolog-load (created by this
4205 function) and the result of a load that takes place in the loop (to be
4206 created by the caller to this function).
4208 For the case of dr_explicit_realign_optimized:
4209 The caller to this function uses the phi-result (msq) to create the
4210 realignment code inside the loop, and sets up the missing phi argument,
4211 as follows:
4212 loop:
4213 msq = phi (msq_init, lsq)
4214 lsq = *(floor(p')); # load in loop
4215 result = realign_load (msq, lsq, realignment_token);
4217 For the case of dr_explicit_realign:
4218 loop:
4219 msq = *(floor(p)); # load in loop
4220 p' = p + (VS-1);
4221 lsq = *(floor(p')); # load in loop
4222 result = realign_load (msq, lsq, realignment_token);
4224 Input:
4225 STMT - (scalar) load stmt to be vectorized. This load accesses
4226 a memory location that may be unaligned.
4227 BSI - place where new code is to be inserted.
4228 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4229 is used.
4231 Output:
4232 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4233 target hook, if defined.
4234 Return value - the result of the loop-header phi node. */
4236 tree
4237 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4238 tree *realignment_token,
4239 enum dr_alignment_support alignment_support_scheme,
4240 tree init_addr,
4241 struct loop **at_loop)
4243 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4244 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4245 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4246 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4247 struct loop *loop = NULL;
4248 edge pe = NULL;
4249 tree scalar_dest = gimple_assign_lhs (stmt);
4250 tree vec_dest;
4251 gimple inc;
4252 tree ptr;
4253 tree data_ref;
4254 gimple new_stmt;
4255 basic_block new_bb;
4256 tree msq_init = NULL_TREE;
4257 tree new_temp;
4258 gimple phi_stmt;
4259 tree msq = NULL_TREE;
4260 gimple_seq stmts = NULL;
4261 bool inv_p;
4262 bool compute_in_loop = false;
4263 bool nested_in_vect_loop = false;
4264 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4265 struct loop *loop_for_initial_load = NULL;
4267 if (loop_vinfo)
4269 loop = LOOP_VINFO_LOOP (loop_vinfo);
4270 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4273 gcc_assert (alignment_support_scheme == dr_explicit_realign
4274 || alignment_support_scheme == dr_explicit_realign_optimized);
4276 /* We need to generate three things:
4277 1. the misalignment computation
4278 2. the extra vector load (for the optimized realignment scheme).
4279 3. the phi node for the two vectors from which the realignment is
4280 done (for the optimized realignment scheme). */
4282 /* 1. Determine where to generate the misalignment computation.
4284 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4285 calculation will be generated by this function, outside the loop (in the
4286 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4287 caller, inside the loop.
4289 Background: If the misalignment remains fixed throughout the iterations of
4290 the loop, then both realignment schemes are applicable, and also the
4291 misalignment computation can be done outside LOOP. This is because we are
4292 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4293 are a multiple of VS (the Vector Size), and therefore the misalignment in
4294 different vectorized LOOP iterations is always the same.
4295 The problem arises only if the memory access is in an inner-loop nested
4296 inside LOOP, which is now being vectorized using outer-loop vectorization.
4297 This is the only case when the misalignment of the memory access may not
4298 remain fixed throughout the iterations of the inner-loop (as explained in
4299 detail in vect_supportable_dr_alignment). In this case, not only is the
4300 optimized realignment scheme not applicable, but also the misalignment
4301 computation (and generation of the realignment token that is passed to
4302 REALIGN_LOAD) have to be done inside the loop.
4304 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4305 or not, which in turn determines if the misalignment is computed inside
4306 the inner-loop, or outside LOOP. */
4308 if (init_addr != NULL_TREE || !loop_vinfo)
4310 compute_in_loop = true;
4311 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4315 /* 2. Determine where to generate the extra vector load.
4317 For the optimized realignment scheme, instead of generating two vector
4318 loads in each iteration, we generate a single extra vector load in the
4319 preheader of the loop, and in each iteration reuse the result of the
4320 vector load from the previous iteration. In case the memory access is in
4321 an inner-loop nested inside LOOP, which is now being vectorized using
4322 outer-loop vectorization, we need to determine whether this initial vector
4323 load should be generated at the preheader of the inner-loop, or can be
4324 generated at the preheader of LOOP. If the memory access has no evolution
4325 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4326 to be generated inside LOOP (in the preheader of the inner-loop). */
4328 if (nested_in_vect_loop)
4330 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4331 bool invariant_in_outerloop =
4332 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4333 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4335 else
4336 loop_for_initial_load = loop;
4337 if (at_loop)
4338 *at_loop = loop_for_initial_load;
4340 if (loop_for_initial_load)
4341 pe = loop_preheader_edge (loop_for_initial_load);
4343 /* 3. For the case of the optimized realignment, create the first vector
4344 load at the loop preheader. */
4346 if (alignment_support_scheme == dr_explicit_realign_optimized)
4348 /* Create msq_init = *(floor(p1)) in the loop preheader */
4350 gcc_assert (!compute_in_loop);
4351 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4352 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4353 NULL_TREE, &init_addr, NULL, &inc,
4354 true, &inv_p);
4355 new_temp = copy_ssa_name (ptr, NULL);
4356 new_stmt = gimple_build_assign_with_ops
4357 (BIT_AND_EXPR, new_temp, ptr,
4358 build_int_cst (TREE_TYPE (ptr),
4359 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4360 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4361 gcc_assert (!new_bb);
4362 data_ref
4363 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4364 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4365 new_stmt = gimple_build_assign (vec_dest, data_ref);
4366 new_temp = make_ssa_name (vec_dest, new_stmt);
4367 gimple_assign_set_lhs (new_stmt, new_temp);
4368 if (pe)
4370 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4371 gcc_assert (!new_bb);
4373 else
4374 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4376 msq_init = gimple_assign_lhs (new_stmt);
4379 /* 4. Create realignment token using a target builtin, if available.
4380 It is done either inside the containing loop, or before LOOP (as
4381 determined above). */
4383 if (targetm.vectorize.builtin_mask_for_load)
4385 tree builtin_decl;
4387 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4388 if (!init_addr)
4390 /* Generate the INIT_ADDR computation outside LOOP. */
4391 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4392 NULL_TREE, loop);
4393 if (loop)
4395 pe = loop_preheader_edge (loop);
4396 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4397 gcc_assert (!new_bb);
4399 else
4400 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4403 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4404 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4405 vec_dest =
4406 vect_create_destination_var (scalar_dest,
4407 gimple_call_return_type (new_stmt));
4408 new_temp = make_ssa_name (vec_dest, new_stmt);
4409 gimple_call_set_lhs (new_stmt, new_temp);
4411 if (compute_in_loop)
4412 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4413 else
4415 /* Generate the misalignment computation outside LOOP. */
4416 pe = loop_preheader_edge (loop);
4417 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4418 gcc_assert (!new_bb);
4421 *realignment_token = gimple_call_lhs (new_stmt);
4423 /* The result of the CALL_EXPR to this builtin is determined from
4424 the value of the parameter and no global variables are touched
4425 which makes the builtin a "const" function. Requiring the
4426 builtin to have the "const" attribute makes it unnecessary
4427 to call mark_call_clobbered. */
4428 gcc_assert (TREE_READONLY (builtin_decl));
4431 if (alignment_support_scheme == dr_explicit_realign)
4432 return msq;
4434 gcc_assert (!compute_in_loop);
4435 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4438 /* 5. Create msq = phi <msq_init, lsq> in loop */
4440 pe = loop_preheader_edge (containing_loop);
4441 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4442 msq = make_ssa_name (vec_dest, NULL);
4443 phi_stmt = create_phi_node (msq, containing_loop->header);
4444 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4446 return msq;
4450 /* Function vect_grouped_load_supported.
4452 Returns TRUE if even and odd permutations are supported,
4453 and FALSE otherwise. */
4455 bool
4456 vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4458 enum machine_mode mode = TYPE_MODE (vectype);
4460 /* vect_permute_load_chain requires the group size to be a power of two. */
4461 if (exact_log2 (count) == -1)
4463 if (vect_print_dump_info (REPORT_DETAILS))
4464 fprintf (vect_dump, "the size of the group of accesses"
4465 " is not a power of 2");
4466 return false;
4469 /* Check that the permutation is supported. */
4470 if (VECTOR_MODE_P (mode))
4472 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4473 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4475 for (i = 0; i < nelt; i++)
4476 sel[i] = i * 2;
4477 if (can_vec_perm_p (mode, false, sel))
4479 for (i = 0; i < nelt; i++)
4480 sel[i] = i * 2 + 1;
4481 if (can_vec_perm_p (mode, false, sel))
4482 return true;
4486 if (vect_print_dump_info (REPORT_DETAILS))
4487 fprintf (vect_dump, "extract even/odd not supported by target");
4488 return false;
4491 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4492 type VECTYPE. */
4494 bool
4495 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4497 return vect_lanes_optab_supported_p ("vec_load_lanes",
4498 vec_load_lanes_optab,
4499 vectype, count);
4502 /* Function vect_permute_load_chain.
4504 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4505 a power of 2, generate extract_even/odd stmts to reorder the input data
4506 correctly. Return the final references for loads in RESULT_CHAIN.
4508 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4509 The input is 4 vectors each containing 8 elements. We assign a number to each
4510 element, the input sequence is:
4512 1st vec: 0 1 2 3 4 5 6 7
4513 2nd vec: 8 9 10 11 12 13 14 15
4514 3rd vec: 16 17 18 19 20 21 22 23
4515 4th vec: 24 25 26 27 28 29 30 31
4517 The output sequence should be:
4519 1st vec: 0 4 8 12 16 20 24 28
4520 2nd vec: 1 5 9 13 17 21 25 29
4521 3rd vec: 2 6 10 14 18 22 26 30
4522 4th vec: 3 7 11 15 19 23 27 31
4524 i.e., the first output vector should contain the first elements of each
4525 interleaving group, etc.
4527 We use extract_even/odd instructions to create such output. The input of
4528 each extract_even/odd operation is two vectors
4529 1st vec 2nd vec
4530 0 1 2 3 4 5 6 7
4532 and the output is the vector of extracted even/odd elements. The output of
4533 extract_even will be: 0 2 4 6
4534 and of extract_odd: 1 3 5 7
4537 The permutation is done in log LENGTH stages. In each stage extract_even
4538 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4539 their order. In our example,
4541 E1: extract_even (1st vec, 2nd vec)
4542 E2: extract_odd (1st vec, 2nd vec)
4543 E3: extract_even (3rd vec, 4th vec)
4544 E4: extract_odd (3rd vec, 4th vec)
4546 The output for the first stage will be:
4548 E1: 0 2 4 6 8 10 12 14
4549 E2: 1 3 5 7 9 11 13 15
4550 E3: 16 18 20 22 24 26 28 30
4551 E4: 17 19 21 23 25 27 29 31
4553 In order to proceed and create the correct sequence for the next stage (or
4554 for the correct output, if the second stage is the last one, as in our
4555 example), we first put the output of extract_even operation and then the
4556 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4557 The input for the second stage is:
4559 1st vec (E1): 0 2 4 6 8 10 12 14
4560 2nd vec (E3): 16 18 20 22 24 26 28 30
4561 3rd vec (E2): 1 3 5 7 9 11 13 15
4562 4th vec (E4): 17 19 21 23 25 27 29 31
4564 The output of the second stage:
4566 E1: 0 4 8 12 16 20 24 28
4567 E2: 2 6 10 14 18 22 26 30
4568 E3: 1 5 9 13 17 21 25 29
4569 E4: 3 7 11 15 19 23 27 31
4571 And RESULT_CHAIN after reordering:
4573 1st vec (E1): 0 4 8 12 16 20 24 28
4574 2nd vec (E3): 1 5 9 13 17 21 25 29
4575 3rd vec (E2): 2 6 10 14 18 22 26 30
4576 4th vec (E4): 3 7 11 15 19 23 27 31. */
4578 static void
4579 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4580 unsigned int length,
4581 gimple stmt,
4582 gimple_stmt_iterator *gsi,
4583 VEC(tree,heap) **result_chain)
4585 tree data_ref, first_vect, second_vect;
4586 tree perm_mask_even, perm_mask_odd;
4587 gimple perm_stmt;
4588 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4589 unsigned int i, j, log_length = exact_log2 (length);
4590 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4591 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4593 *result_chain = VEC_copy (tree, heap, dr_chain);
4595 for (i = 0; i < nelt; ++i)
4596 sel[i] = i * 2;
4597 perm_mask_even = vect_gen_perm_mask (vectype, sel);
4598 gcc_assert (perm_mask_even != NULL);
4600 for (i = 0; i < nelt; ++i)
4601 sel[i] = i * 2 + 1;
4602 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4603 gcc_assert (perm_mask_odd != NULL);
4605 for (i = 0; i < log_length; i++)
4607 for (j = 0; j < length; j += 2)
4609 first_vect = VEC_index (tree, dr_chain, j);
4610 second_vect = VEC_index (tree, dr_chain, j+1);
4612 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4613 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
4614 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4615 first_vect, second_vect,
4616 perm_mask_even);
4617 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4618 VEC_replace (tree, *result_chain, j/2, data_ref);
4620 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4621 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
4622 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4623 first_vect, second_vect,
4624 perm_mask_odd);
4625 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4626 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4628 dr_chain = VEC_copy (tree, heap, *result_chain);
4633 /* Function vect_transform_grouped_load.
4635 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4636 to perform their permutation and ascribe the result vectorized statements to
4637 the scalar statements.
4640 void
4641 vect_transform_grouped_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4642 gimple_stmt_iterator *gsi)
4644 VEC(tree,heap) *result_chain = NULL;
4646 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4647 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4648 vectors, that are ready for vector computation. */
4649 result_chain = VEC_alloc (tree, heap, size);
4650 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4651 vect_record_grouped_load_vectors (stmt, result_chain);
4652 VEC_free (tree, heap, result_chain);
4655 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4656 generated as part of the vectorization of STMT. Assign the statement
4657 for each vector to the associated scalar statement. */
4659 void
4660 vect_record_grouped_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4662 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4663 gimple next_stmt, new_stmt;
4664 unsigned int i, gap_count;
4665 tree tmp_data_ref;
4667 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4668 Since we scan the chain starting from it's first node, their order
4669 corresponds the order of data-refs in RESULT_CHAIN. */
4670 next_stmt = first_stmt;
4671 gap_count = 1;
4672 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4674 if (!next_stmt)
4675 break;
4677 /* Skip the gaps. Loads created for the gaps will be removed by dead
4678 code elimination pass later. No need to check for the first stmt in
4679 the group, since it always exists.
4680 GROUP_GAP is the number of steps in elements from the previous
4681 access (if there is no gap GROUP_GAP is 1). We skip loads that
4682 correspond to the gaps. */
4683 if (next_stmt != first_stmt
4684 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4686 gap_count++;
4687 continue;
4690 while (next_stmt)
4692 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4693 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4694 copies, and we put the new vector statement in the first available
4695 RELATED_STMT. */
4696 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4697 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4698 else
4700 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4702 gimple prev_stmt =
4703 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4704 gimple rel_stmt =
4705 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4706 while (rel_stmt)
4708 prev_stmt = rel_stmt;
4709 rel_stmt =
4710 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4713 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4714 new_stmt;
4718 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4719 gap_count = 1;
4720 /* If NEXT_STMT accesses the same DR as the previous statement,
4721 put the same TMP_DATA_REF as its vectorized statement; otherwise
4722 get the next data-ref from RESULT_CHAIN. */
4723 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4724 break;
4729 /* Function vect_force_dr_alignment_p.
4731 Returns whether the alignment of a DECL can be forced to be aligned
4732 on ALIGNMENT bit boundary. */
4734 bool
4735 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4737 if (TREE_CODE (decl) != VAR_DECL)
4738 return false;
4740 /* We cannot change alignment of common or external symbols as another
4741 translation unit may contain a definition with lower alignment.
4742 The rules of common symbol linking mean that the definition
4743 will override the common symbol. */
4744 if (DECL_EXTERNAL (decl)
4745 || DECL_COMMON (decl))
4746 return false;
4748 if (TREE_ASM_WRITTEN (decl))
4749 return false;
4751 /* Do not override the alignment as specified by the ABI when the used
4752 attribute is set. */
4753 if (DECL_PRESERVE_P (decl))
4754 return false;
4756 if (TREE_STATIC (decl))
4757 return (alignment <= MAX_OFILE_ALIGNMENT);
4758 else
4759 return (alignment <= MAX_STACK_ALIGNMENT);
4763 /* Return whether the data reference DR is supported with respect to its
4764 alignment.
4765 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4766 it is aligned, i.e., check if it is possible to vectorize it with different
4767 alignment. */
4769 enum dr_alignment_support
4770 vect_supportable_dr_alignment (struct data_reference *dr,
4771 bool check_aligned_accesses)
4773 gimple stmt = DR_STMT (dr);
4774 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4775 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4776 enum machine_mode mode = TYPE_MODE (vectype);
4777 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4778 struct loop *vect_loop = NULL;
4779 bool nested_in_vect_loop = false;
4781 if (aligned_access_p (dr) && !check_aligned_accesses)
4782 return dr_aligned;
4784 if (loop_vinfo)
4786 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4787 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4790 /* Possibly unaligned access. */
4792 /* We can choose between using the implicit realignment scheme (generating
4793 a misaligned_move stmt) and the explicit realignment scheme (generating
4794 aligned loads with a REALIGN_LOAD). There are two variants to the
4795 explicit realignment scheme: optimized, and unoptimized.
4796 We can optimize the realignment only if the step between consecutive
4797 vector loads is equal to the vector size. Since the vector memory
4798 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4799 is guaranteed that the misalignment amount remains the same throughout the
4800 execution of the vectorized loop. Therefore, we can create the
4801 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4802 at the loop preheader.
4804 However, in the case of outer-loop vectorization, when vectorizing a
4805 memory access in the inner-loop nested within the LOOP that is now being
4806 vectorized, while it is guaranteed that the misalignment of the
4807 vectorized memory access will remain the same in different outer-loop
4808 iterations, it is *not* guaranteed that is will remain the same throughout
4809 the execution of the inner-loop. This is because the inner-loop advances
4810 with the original scalar step (and not in steps of VS). If the inner-loop
4811 step happens to be a multiple of VS, then the misalignment remains fixed
4812 and we can use the optimized realignment scheme. For example:
4814 for (i=0; i<N; i++)
4815 for (j=0; j<M; j++)
4816 s += a[i+j];
4818 When vectorizing the i-loop in the above example, the step between
4819 consecutive vector loads is 1, and so the misalignment does not remain
4820 fixed across the execution of the inner-loop, and the realignment cannot
4821 be optimized (as illustrated in the following pseudo vectorized loop):
4823 for (i=0; i<N; i+=4)
4824 for (j=0; j<M; j++){
4825 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4826 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4827 // (assuming that we start from an aligned address).
4830 We therefore have to use the unoptimized realignment scheme:
4832 for (i=0; i<N; i+=4)
4833 for (j=k; j<M; j+=4)
4834 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4835 // that the misalignment of the initial address is
4836 // 0).
4838 The loop can then be vectorized as follows:
4840 for (k=0; k<4; k++){
4841 rt = get_realignment_token (&vp[k]);
4842 for (i=0; i<N; i+=4){
4843 v1 = vp[i+k];
4844 for (j=k; j<M; j+=4){
4845 v2 = vp[i+j+VS-1];
4846 va = REALIGN_LOAD <v1,v2,rt>;
4847 vs += va;
4848 v1 = v2;
4851 } */
4853 if (DR_IS_READ (dr))
4855 bool is_packed = false;
4856 tree type = (TREE_TYPE (DR_REF (dr)));
4858 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4859 && (!targetm.vectorize.builtin_mask_for_load
4860 || targetm.vectorize.builtin_mask_for_load ()))
4862 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4863 if ((nested_in_vect_loop
4864 && (TREE_INT_CST_LOW (DR_STEP (dr))
4865 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4866 || !loop_vinfo)
4867 return dr_explicit_realign;
4868 else
4869 return dr_explicit_realign_optimized;
4871 if (!known_alignment_for_access_p (dr))
4872 is_packed = not_size_aligned (DR_REF (dr));
4874 if (targetm.vectorize.
4875 support_vector_misalignment (mode, type,
4876 DR_MISALIGNMENT (dr), is_packed))
4877 /* Can't software pipeline the loads, but can at least do them. */
4878 return dr_unaligned_supported;
4880 else
4882 bool is_packed = false;
4883 tree type = (TREE_TYPE (DR_REF (dr)));
4885 if (!known_alignment_for_access_p (dr))
4886 is_packed = not_size_aligned (DR_REF (dr));
4888 if (targetm.vectorize.
4889 support_vector_misalignment (mode, type,
4890 DR_MISALIGNMENT (dr), is_packed))
4891 return dr_unaligned_supported;
4894 /* Unsupported. */
4895 return dr_unaligned_unsupported;