re PR bootstrap/54281 (Fails to bootstrap with --disable-nls)
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob4df2e50f9bf4ab4ccbc12e05c102d2d4bf5bd84d
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "dumpfile.h"
35 #include "cfgloop.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
39 #include "diagnostic-core.h"
41 /* Need to include rtl.h, expr.h, etc. for optabs. */
42 #include "expr.h"
43 #include "optabs.h"
45 /* Return true if load- or store-lanes optab OPTAB is implemented for
46 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
48 static bool
49 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
50 tree vectype, unsigned HOST_WIDE_INT count)
52 enum machine_mode mode, array_mode;
53 bool limit_p;
55 mode = TYPE_MODE (vectype);
56 limit_p = !targetm.array_mode_supported_p (mode, count);
57 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
58 MODE_INT, limit_p);
60 if (array_mode == BLKmode)
62 if (vect_print_dump_info (REPORT_DETAILS))
63 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
64 GET_MODE_NAME (mode), count);
65 return false;
68 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
70 if (vect_print_dump_info (REPORT_DETAILS))
71 fprintf (vect_dump, "cannot use %s<%s><%s>",
72 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
73 return false;
76 if (vect_print_dump_info (REPORT_DETAILS))
77 fprintf (vect_dump, "can use %s<%s><%s>",
78 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
80 return true;
84 /* Return the smallest scalar part of STMT.
85 This is used to determine the vectype of the stmt. We generally set the
86 vectype according to the type of the result (lhs). For stmts whose
87 result-type is different than the type of the arguments (e.g., demotion,
88 promotion), vectype will be reset appropriately (later). Note that we have
89 to visit the smallest datatype in this function, because that determines the
90 VF. If the smallest datatype in the loop is present only as the rhs of a
91 promotion operation - we'd miss it.
92 Such a case, where a variable of this datatype does not appear in the lhs
93 anywhere in the loop, can only occur if it's an invariant: e.g.:
94 'int_x = (int) short_inv', which we'd expect to have been optimized away by
95 invariant motion. However, we cannot rely on invariant motion to always
96 take invariants out of the loop, and so in the case of promotion we also
97 have to check the rhs.
98 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
99 types. */
101 tree
102 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
103 HOST_WIDE_INT *rhs_size_unit)
105 tree scalar_type = gimple_expr_type (stmt);
106 HOST_WIDE_INT lhs, rhs;
108 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
110 if (is_gimple_assign (stmt)
111 && (gimple_assign_cast_p (stmt)
112 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
113 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119 if (rhs < lhs)
120 scalar_type = rhs_type;
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
125 return scalar_type;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135 gimple next_stmt = first_stmt;
136 int result = 0;
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139 return -1;
141 while (next_stmt && next_stmt != stmt)
143 result++;
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
147 if (next_stmt)
148 return result;
149 else
150 return -1;
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
158 static void
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
162 gimple prev, next;
163 tree next_init;
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169 while (next)
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
174 /* Insert here. */
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177 return;
179 prev = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
196 FIRST_DR = DRB
197 NEXT_DR (DRB) = DRA
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
204 FIRST_DR = DRB
205 NEXT(FIRST_DR) = previous FIRST_DR
206 else
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
212 static void
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
220 tree node_init;
221 gimple node, prev, next, first_stmt;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229 return;
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
238 return;
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246 old_first_stmt)));
247 gimple tmp;
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
259 while (tmp)
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
265 else
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
271 return;
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
278 return;
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
291 else
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
301 while (node)
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305 while (next)
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
310 /* Insert here. */
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313 prev = node;
314 break;
316 prev = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
319 if (!next)
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324 prev = node;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
337 static bool
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342 gimple earlier_stmt;
344 /* We only call this function for pairs of loads and stores, but we verify
345 it here. */
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 if (DR_IS_READ (dra))
349 return false;
350 else
351 return true;
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
358 return true;
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
367 return true;
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
374 return false;
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383 return false;
385 return true;
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
394 static bool
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
406 return false;
408 /* Check:
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
412 data-refs' inits. */
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
420 return false;
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
426 if (init_a > init_b)
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
432 if (step && (init_a - init_b) > step)
433 return false;
435 if (diff_mod_size == 0)
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
445 return true;
448 else
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
454 if (step && (init_b - init_a) > step)
455 return false;
457 if (diff_mod_size == 0)
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
467 return true;
471 return false;
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
478 static bool
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489 return true;
490 else
491 return false;
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
497 static bool
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504 return true;
505 else
506 return false;
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
513 static bool
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519 return false;
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
529 if (optimize_loop_nest_for_size_p (loop))
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
533 return false;
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
537 if (loop->inner)
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541 return false;
544 /* FORNOW: We don't support creating runtime alias tests for non-constant
545 step. */
546 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
547 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
549 if (vect_print_dump_info (REPORT_DR_DETAILS))
550 fprintf (vect_dump, "versioning not yet supported for non-constant "
551 "step");
552 return false;
555 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
556 return true;
560 /* Function vect_analyze_data_ref_dependence.
562 Return TRUE if there (might) exist a dependence between a memory-reference
563 DRA and a memory-reference DRB. When versioning for alias may check a
564 dependence at run-time, return FALSE. Adjust *MAX_VF according to
565 the data dependence. */
567 static bool
568 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
569 loop_vec_info loop_vinfo, int *max_vf)
571 unsigned int i;
572 struct loop *loop = NULL;
573 struct data_reference *dra = DDR_A (ddr);
574 struct data_reference *drb = DDR_B (ddr);
575 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
576 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
577 lambda_vector dist_v;
578 unsigned int loop_depth;
580 /* Don't bother to analyze statements marked as unvectorizable. */
581 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
582 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
583 return false;
585 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
587 /* Independent data accesses. */
588 vect_check_interleaving (dra, drb);
589 return false;
592 if (loop_vinfo)
593 loop = LOOP_VINFO_LOOP (loop_vinfo);
595 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
596 return false;
598 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
600 gimple earlier_stmt;
602 if (loop_vinfo)
604 if (vect_print_dump_info (REPORT_DR_DETAILS))
606 fprintf (vect_dump, "versioning for alias required: "
607 "can't determine dependence between ");
608 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
609 fprintf (vect_dump, " and ");
610 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
613 /* Add to list of ddrs that need to be tested at run-time. */
614 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
617 /* When vectorizing a basic block unknown depnedence can still mean
618 grouped access. */
619 if (vect_check_interleaving (dra, drb))
620 return false;
622 /* Read-read is OK (we need this check here, after checking for
623 interleaving). */
624 if (DR_IS_READ (dra) && DR_IS_READ (drb))
625 return false;
627 if (vect_print_dump_info (REPORT_DR_DETAILS))
629 fprintf (vect_dump, "can't determine dependence between ");
630 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
631 fprintf (vect_dump, " and ");
632 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
635 /* We do not vectorize basic blocks with write-write dependencies. */
636 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
637 return true;
639 /* Check that it's not a load-after-store dependence. */
640 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
641 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
642 return true;
644 return false;
647 /* Versioning for alias is not yet supported for basic block SLP, and
648 dependence distance is unapplicable, hence, in case of known data
649 dependence, basic block vectorization is impossible for now. */
650 if (!loop_vinfo)
652 if (dra != drb && vect_check_interleaving (dra, drb))
653 return false;
655 if (vect_print_dump_info (REPORT_DR_DETAILS))
657 fprintf (vect_dump, "determined dependence between ");
658 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
659 fprintf (vect_dump, " and ");
660 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
663 /* Do not vectorize basic blcoks with write-write dependences. */
664 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
665 return true;
667 /* Check if this dependence is allowed in basic block vectorization. */
668 return vect_drs_dependent_in_basic_block (dra, drb);
671 /* Loop-based vectorization and known data dependence. */
672 if (DDR_NUM_DIST_VECTS (ddr) == 0)
674 if (vect_print_dump_info (REPORT_DR_DETAILS))
676 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
677 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
678 fprintf (vect_dump, " and ");
679 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
681 /* Add to list of ddrs that need to be tested at run-time. */
682 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
685 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
686 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
688 int dist = dist_v[loop_depth];
690 if (vect_print_dump_info (REPORT_DR_DETAILS))
691 fprintf (vect_dump, "dependence distance = %d.", dist);
693 if (dist == 0)
695 if (vect_print_dump_info (REPORT_DR_DETAILS))
697 fprintf (vect_dump, "dependence distance == 0 between ");
698 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
699 fprintf (vect_dump, " and ");
700 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
703 /* For interleaving, mark that there is a read-write dependency if
704 necessary. We check before that one of the data-refs is store. */
705 if (DR_IS_READ (dra))
706 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
707 else
709 if (DR_IS_READ (drb))
710 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
713 continue;
716 if (dist > 0 && DDR_REVERSED_P (ddr))
718 /* If DDR_REVERSED_P the order of the data-refs in DDR was
719 reversed (to make distance vector positive), and the actual
720 distance is negative. */
721 if (vect_print_dump_info (REPORT_DR_DETAILS))
722 fprintf (vect_dump, "dependence distance negative.");
723 continue;
726 if (abs (dist) >= 2
727 && abs (dist) < *max_vf)
729 /* The dependence distance requires reduction of the maximal
730 vectorization factor. */
731 *max_vf = abs (dist);
732 if (vect_print_dump_info (REPORT_DR_DETAILS))
733 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
734 *max_vf);
737 if (abs (dist) >= *max_vf)
739 /* Dependence distance does not create dependence, as far as
740 vectorization is concerned, in this case. */
741 if (vect_print_dump_info (REPORT_DR_DETAILS))
742 fprintf (vect_dump, "dependence distance >= VF.");
743 continue;
746 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
748 fprintf (vect_dump, "not vectorized, possible dependence "
749 "between data-refs ");
750 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
751 fprintf (vect_dump, " and ");
752 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
755 return true;
758 return false;
761 /* Function vect_analyze_data_ref_dependences.
763 Examine all the data references in the loop, and make sure there do not
764 exist any data dependences between them. Set *MAX_VF according to
765 the maximum vectorization factor the data dependences allow. */
767 bool
768 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
769 bb_vec_info bb_vinfo, int *max_vf)
771 unsigned int i;
772 VEC (ddr_p, heap) *ddrs = NULL;
773 struct data_dependence_relation *ddr;
775 if (vect_print_dump_info (REPORT_DETAILS))
776 fprintf (vect_dump, "=== vect_analyze_dependences ===");
778 if (loop_vinfo)
779 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
780 else
781 ddrs = BB_VINFO_DDRS (bb_vinfo);
783 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
784 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
785 return false;
787 return true;
791 /* Function vect_compute_data_ref_alignment
793 Compute the misalignment of the data reference DR.
795 Output:
796 1. If during the misalignment computation it is found that the data reference
797 cannot be vectorized then false is returned.
798 2. DR_MISALIGNMENT (DR) is defined.
800 FOR NOW: No analysis is actually performed. Misalignment is calculated
801 only for trivial cases. TODO. */
803 static bool
804 vect_compute_data_ref_alignment (struct data_reference *dr)
806 gimple stmt = DR_STMT (dr);
807 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
808 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
809 struct loop *loop = NULL;
810 tree ref = DR_REF (dr);
811 tree vectype;
812 tree base, base_addr;
813 bool base_aligned;
814 tree misalign;
815 tree aligned_to, alignment;
817 if (vect_print_dump_info (REPORT_DETAILS))
818 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
820 if (loop_vinfo)
821 loop = LOOP_VINFO_LOOP (loop_vinfo);
823 /* Initialize misalignment to unknown. */
824 SET_DR_MISALIGNMENT (dr, -1);
826 /* Strided loads perform only component accesses, misalignment information
827 is irrelevant for them. */
828 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
829 return true;
831 misalign = DR_INIT (dr);
832 aligned_to = DR_ALIGNED_TO (dr);
833 base_addr = DR_BASE_ADDRESS (dr);
834 vectype = STMT_VINFO_VECTYPE (stmt_info);
836 /* In case the dataref is in an inner-loop of the loop that is being
837 vectorized (LOOP), we use the base and misalignment information
838 relative to the outer-loop (LOOP). This is ok only if the misalignment
839 stays the same throughout the execution of the inner-loop, which is why
840 we have to check that the stride of the dataref in the inner-loop evenly
841 divides by the vector size. */
842 if (loop && nested_in_vect_loop_p (loop, stmt))
844 tree step = DR_STEP (dr);
845 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
847 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
849 if (vect_print_dump_info (REPORT_ALIGNMENT))
850 fprintf (vect_dump, "inner step divides the vector-size.");
851 misalign = STMT_VINFO_DR_INIT (stmt_info);
852 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
853 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
855 else
857 if (vect_print_dump_info (REPORT_ALIGNMENT))
858 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
859 misalign = NULL_TREE;
863 /* Similarly, if we're doing basic-block vectorization, we can only use
864 base and misalignment information relative to an innermost loop if the
865 misalignment stays the same throughout the execution of the loop.
866 As above, this is the case if the stride of the dataref evenly divides
867 by the vector size. */
868 if (!loop)
870 tree step = DR_STEP (dr);
871 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
873 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
875 if (vect_print_dump_info (REPORT_ALIGNMENT))
876 fprintf (vect_dump, "SLP: step doesn't divide the vector-size.");
877 misalign = NULL_TREE;
881 base = build_fold_indirect_ref (base_addr);
882 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
884 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
885 || !misalign)
887 if (vect_print_dump_info (REPORT_ALIGNMENT))
889 fprintf (vect_dump, "Unknown alignment for access: ");
890 print_generic_expr (vect_dump, base, TDF_SLIM);
892 return true;
895 if ((DECL_P (base)
896 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
897 alignment) >= 0)
898 || (TREE_CODE (base_addr) == SSA_NAME
899 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
900 TREE_TYPE (base_addr)))),
901 alignment) >= 0)
902 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
903 base_aligned = true;
904 else
905 base_aligned = false;
907 if (!base_aligned)
909 /* Do not change the alignment of global variables here if
910 flag_section_anchors is enabled as we already generated
911 RTL for other functions. Most global variables should
912 have been aligned during the IPA increase_alignment pass. */
913 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
914 || (TREE_STATIC (base) && flag_section_anchors))
916 if (vect_print_dump_info (REPORT_DETAILS))
918 fprintf (vect_dump, "can't force alignment of ref: ");
919 print_generic_expr (vect_dump, ref, TDF_SLIM);
921 return true;
924 /* Force the alignment of the decl.
925 NOTE: This is the only change to the code we make during
926 the analysis phase, before deciding to vectorize the loop. */
927 if (vect_print_dump_info (REPORT_DETAILS))
929 fprintf (vect_dump, "force alignment of ");
930 print_generic_expr (vect_dump, ref, TDF_SLIM);
933 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
934 DECL_USER_ALIGN (base) = 1;
937 /* At this point we assume that the base is aligned. */
938 gcc_assert (base_aligned
939 || (TREE_CODE (base) == VAR_DECL
940 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
942 /* If this is a backward running DR then first access in the larger
943 vectype actually is N-1 elements before the address in the DR.
944 Adjust misalign accordingly. */
945 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
947 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
948 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
949 otherwise we wouldn't be here. */
950 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
951 /* PLUS because DR_STEP was negative. */
952 misalign = size_binop (PLUS_EXPR, misalign, offset);
955 /* Modulo alignment. */
956 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
958 if (!host_integerp (misalign, 1))
960 /* Negative or overflowed misalignment value. */
961 if (vect_print_dump_info (REPORT_DETAILS))
962 fprintf (vect_dump, "unexpected misalign value");
963 return false;
966 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
968 if (vect_print_dump_info (REPORT_DETAILS))
970 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
971 print_generic_expr (vect_dump, ref, TDF_SLIM);
974 return true;
978 /* Function vect_compute_data_refs_alignment
980 Compute the misalignment of data references in the loop.
981 Return FALSE if a data reference is found that cannot be vectorized. */
983 static bool
984 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
985 bb_vec_info bb_vinfo)
987 VEC (data_reference_p, heap) *datarefs;
988 struct data_reference *dr;
989 unsigned int i;
991 if (loop_vinfo)
992 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
993 else
994 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
996 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
997 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
998 && !vect_compute_data_ref_alignment (dr))
1000 if (bb_vinfo)
1002 /* Mark unsupported statement as unvectorizable. */
1003 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1004 continue;
1006 else
1007 return false;
1010 return true;
1014 /* Function vect_update_misalignment_for_peel
1016 DR - the data reference whose misalignment is to be adjusted.
1017 DR_PEEL - the data reference whose misalignment is being made
1018 zero in the vector loop by the peel.
1019 NPEEL - the number of iterations in the peel loop if the misalignment
1020 of DR_PEEL is known at compile time. */
1022 static void
1023 vect_update_misalignment_for_peel (struct data_reference *dr,
1024 struct data_reference *dr_peel, int npeel)
1026 unsigned int i;
1027 VEC(dr_p,heap) *same_align_drs;
1028 struct data_reference *current_dr;
1029 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1030 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1031 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1032 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1034 /* For interleaved data accesses the step in the loop must be multiplied by
1035 the size of the interleaving group. */
1036 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1037 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1038 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
1039 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1041 /* It can be assumed that the data refs with the same alignment as dr_peel
1042 are aligned in the vector loop. */
1043 same_align_drs
1044 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1045 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1047 if (current_dr != dr)
1048 continue;
1049 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1050 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1051 SET_DR_MISALIGNMENT (dr, 0);
1052 return;
1055 if (known_alignment_for_access_p (dr)
1056 && known_alignment_for_access_p (dr_peel))
1058 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1059 int misal = DR_MISALIGNMENT (dr);
1060 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1061 misal += negative ? -npeel * dr_size : npeel * dr_size;
1062 misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
1063 SET_DR_MISALIGNMENT (dr, misal);
1064 return;
1067 if (vect_print_dump_info (REPORT_DETAILS))
1068 fprintf (vect_dump, "Setting misalignment to -1.");
1069 SET_DR_MISALIGNMENT (dr, -1);
1073 /* Function vect_verify_datarefs_alignment
1075 Return TRUE if all data references in the loop can be
1076 handled with respect to alignment. */
1078 bool
1079 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1081 VEC (data_reference_p, heap) *datarefs;
1082 struct data_reference *dr;
1083 enum dr_alignment_support supportable_dr_alignment;
1084 unsigned int i;
1086 if (loop_vinfo)
1087 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1088 else
1089 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1091 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1093 gimple stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1096 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1097 continue;
1099 /* For interleaving, only the alignment of the first access matters.
1100 Skip statements marked as not vectorizable. */
1101 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
1102 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1103 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1104 continue;
1106 /* Strided loads perform only component accesses, alignment is
1107 irrelevant for them. */
1108 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1109 continue;
1111 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1112 if (!supportable_dr_alignment)
1114 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1116 if (DR_IS_READ (dr))
1117 fprintf (vect_dump,
1118 "not vectorized: unsupported unaligned load.");
1119 else
1120 fprintf (vect_dump,
1121 "not vectorized: unsupported unaligned store.");
1123 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1125 return false;
1127 if (supportable_dr_alignment != dr_aligned
1128 && vect_print_dump_info (REPORT_ALIGNMENT))
1129 fprintf (vect_dump, "Vectorizing an unaligned access.");
1131 return true;
1134 /* Given an memory reference EXP return whether its alignment is less
1135 than its size. */
1137 static bool
1138 not_size_aligned (tree exp)
1140 if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp)), 1))
1141 return true;
1143 return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp)))
1144 > get_object_alignment (exp));
1147 /* Function vector_alignment_reachable_p
1149 Return true if vector alignment for DR is reachable by peeling
1150 a few loop iterations. Return false otherwise. */
1152 static bool
1153 vector_alignment_reachable_p (struct data_reference *dr)
1155 gimple stmt = DR_STMT (dr);
1156 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1157 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1159 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1161 /* For interleaved access we peel only if number of iterations in
1162 the prolog loop ({VF - misalignment}), is a multiple of the
1163 number of the interleaved accesses. */
1164 int elem_size, mis_in_elements;
1165 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1167 /* FORNOW: handle only known alignment. */
1168 if (!known_alignment_for_access_p (dr))
1169 return false;
1171 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1172 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1174 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1175 return false;
1178 /* If misalignment is known at the compile time then allow peeling
1179 only if natural alignment is reachable through peeling. */
1180 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1182 HOST_WIDE_INT elmsize =
1183 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1184 if (vect_print_dump_info (REPORT_DETAILS))
1186 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1187 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1189 if (DR_MISALIGNMENT (dr) % elmsize)
1191 if (vect_print_dump_info (REPORT_DETAILS))
1192 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1193 return false;
1197 if (!known_alignment_for_access_p (dr))
1199 tree type = TREE_TYPE (DR_REF (dr));
1200 bool is_packed = not_size_aligned (DR_REF (dr));
1201 if (vect_print_dump_info (REPORT_DETAILS))
1202 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1203 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1204 return true;
1205 else
1206 return false;
1209 return true;
1213 /* Calculate the cost of the memory access represented by DR. */
1215 static void
1216 vect_get_data_access_cost (struct data_reference *dr,
1217 unsigned int *inside_cost,
1218 unsigned int *outside_cost,
1219 stmt_vector_for_cost *body_cost_vec)
1221 gimple stmt = DR_STMT (dr);
1222 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1223 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1224 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1225 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1226 int ncopies = vf / nunits;
1228 if (DR_IS_READ (dr))
1229 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1230 NULL, body_cost_vec, false);
1231 else
1232 vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1234 if (vect_print_dump_info (REPORT_COST))
1235 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1236 "outside_cost = %d.", *inside_cost, *outside_cost);
1240 static hashval_t
1241 vect_peeling_hash (const void *elem)
1243 const struct _vect_peel_info *peel_info;
1245 peel_info = (const struct _vect_peel_info *) elem;
1246 return (hashval_t) peel_info->npeel;
1250 static int
1251 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1253 const struct _vect_peel_info *a, *b;
1255 a = (const struct _vect_peel_info *) elem1;
1256 b = (const struct _vect_peel_info *) elem2;
1257 return (a->npeel == b->npeel);
1261 /* Insert DR into peeling hash table with NPEEL as key. */
1263 static void
1264 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1265 int npeel)
1267 struct _vect_peel_info elem, *slot;
1268 void **new_slot;
1269 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1271 elem.npeel = npeel;
1272 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1273 &elem);
1274 if (slot)
1275 slot->count++;
1276 else
1278 slot = XNEW (struct _vect_peel_info);
1279 slot->npeel = npeel;
1280 slot->dr = dr;
1281 slot->count = 1;
1282 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1283 INSERT);
1284 *new_slot = slot;
1287 if (!supportable_dr_alignment && !flag_vect_cost_model)
1288 slot->count += VECT_MAX_COST;
1292 /* Traverse peeling hash table to find peeling option that aligns maximum
1293 number of data accesses. */
1295 static int
1296 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1298 vect_peel_info elem = (vect_peel_info) *slot;
1299 vect_peel_extended_info max = (vect_peel_extended_info) data;
1301 if (elem->count > max->peel_info.count
1302 || (elem->count == max->peel_info.count
1303 && max->peel_info.npeel > elem->npeel))
1305 max->peel_info.npeel = elem->npeel;
1306 max->peel_info.count = elem->count;
1307 max->peel_info.dr = elem->dr;
1310 return 1;
1314 /* Traverse peeling hash table and calculate cost for each peeling option.
1315 Find the one with the lowest cost. */
1317 static int
1318 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1320 vect_peel_info elem = (vect_peel_info) *slot;
1321 vect_peel_extended_info min = (vect_peel_extended_info) data;
1322 int save_misalignment, dummy;
1323 unsigned int inside_cost = 0, outside_cost = 0, i;
1324 gimple stmt = DR_STMT (elem->dr);
1325 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1326 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1327 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1328 struct data_reference *dr;
1329 stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1330 int single_iter_cost;
1332 prologue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1333 body_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1334 epilogue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1336 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1338 stmt = DR_STMT (dr);
1339 stmt_info = vinfo_for_stmt (stmt);
1340 /* For interleaving, only the alignment of the first access
1341 matters. */
1342 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1343 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1344 continue;
1346 save_misalignment = DR_MISALIGNMENT (dr);
1347 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1348 vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1349 &body_cost_vec);
1350 SET_DR_MISALIGNMENT (dr, save_misalignment);
1353 single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1354 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1355 &dummy, single_iter_cost,
1356 &prologue_cost_vec,
1357 &epilogue_cost_vec);
1359 /* Prologue and epilogue costs are added to the target model later.
1360 These costs depend only on the scalar iteration cost, the
1361 number of peeling iterations finally chosen, and the number of
1362 misaligned statements. So discard the information found here. */
1363 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1364 VEC_free (stmt_info_for_cost, heap, epilogue_cost_vec);
1366 if (inside_cost < min->inside_cost
1367 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1369 min->inside_cost = inside_cost;
1370 min->outside_cost = outside_cost;
1371 VEC_free (stmt_info_for_cost, heap, min->body_cost_vec);
1372 min->body_cost_vec = body_cost_vec;
1373 min->peel_info.dr = elem->dr;
1374 min->peel_info.npeel = elem->npeel;
1376 else
1377 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1379 return 1;
1383 /* Choose best peeling option by traversing peeling hash table and either
1384 choosing an option with the lowest cost (if cost model is enabled) or the
1385 option that aligns as many accesses as possible. */
1387 static struct data_reference *
1388 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1389 unsigned int *npeel,
1390 stmt_vector_for_cost *body_cost_vec)
1392 struct _vect_peel_extended_info res;
1394 res.peel_info.dr = NULL;
1395 res.body_cost_vec = NULL;
1397 if (flag_vect_cost_model)
1399 res.inside_cost = INT_MAX;
1400 res.outside_cost = INT_MAX;
1401 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1402 vect_peeling_hash_get_lowest_cost, &res);
1404 else
1406 res.peel_info.count = 0;
1407 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1408 vect_peeling_hash_get_most_frequent, &res);
1411 *npeel = res.peel_info.npeel;
1412 *body_cost_vec = res.body_cost_vec;
1413 return res.peel_info.dr;
1417 /* Function vect_enhance_data_refs_alignment
1419 This pass will use loop versioning and loop peeling in order to enhance
1420 the alignment of data references in the loop.
1422 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1423 original loop is to be vectorized. Any other loops that are created by
1424 the transformations performed in this pass - are not supposed to be
1425 vectorized. This restriction will be relaxed.
1427 This pass will require a cost model to guide it whether to apply peeling
1428 or versioning or a combination of the two. For example, the scheme that
1429 intel uses when given a loop with several memory accesses, is as follows:
1430 choose one memory access ('p') which alignment you want to force by doing
1431 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1432 other accesses are not necessarily aligned, or (2) use loop versioning to
1433 generate one loop in which all accesses are aligned, and another loop in
1434 which only 'p' is necessarily aligned.
1436 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1437 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1438 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1440 Devising a cost model is the most critical aspect of this work. It will
1441 guide us on which access to peel for, whether to use loop versioning, how
1442 many versions to create, etc. The cost model will probably consist of
1443 generic considerations as well as target specific considerations (on
1444 powerpc for example, misaligned stores are more painful than misaligned
1445 loads).
1447 Here are the general steps involved in alignment enhancements:
1449 -- original loop, before alignment analysis:
1450 for (i=0; i<N; i++){
1451 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1452 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1455 -- After vect_compute_data_refs_alignment:
1456 for (i=0; i<N; i++){
1457 x = q[i]; # DR_MISALIGNMENT(q) = 3
1458 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1461 -- Possibility 1: we do loop versioning:
1462 if (p is aligned) {
1463 for (i=0; i<N; i++){ # loop 1A
1464 x = q[i]; # DR_MISALIGNMENT(q) = 3
1465 p[i] = y; # DR_MISALIGNMENT(p) = 0
1468 else {
1469 for (i=0; i<N; i++){ # loop 1B
1470 x = q[i]; # DR_MISALIGNMENT(q) = 3
1471 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1475 -- Possibility 2: we do loop peeling:
1476 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1477 x = q[i];
1478 p[i] = y;
1480 for (i = 3; i < N; i++){ # loop 2A
1481 x = q[i]; # DR_MISALIGNMENT(q) = 0
1482 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1485 -- Possibility 3: combination of loop peeling and versioning:
1486 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1487 x = q[i];
1488 p[i] = y;
1490 if (p is aligned) {
1491 for (i = 3; i<N; i++){ # loop 3A
1492 x = q[i]; # DR_MISALIGNMENT(q) = 0
1493 p[i] = y; # DR_MISALIGNMENT(p) = 0
1496 else {
1497 for (i = 3; i<N; i++){ # loop 3B
1498 x = q[i]; # DR_MISALIGNMENT(q) = 0
1499 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1503 These loops are later passed to loop_transform to be vectorized. The
1504 vectorizer will use the alignment information to guide the transformation
1505 (whether to generate regular loads/stores, or with special handling for
1506 misalignment). */
1508 bool
1509 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1511 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1512 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1513 enum dr_alignment_support supportable_dr_alignment;
1514 struct data_reference *dr0 = NULL, *first_store = NULL;
1515 struct data_reference *dr;
1516 unsigned int i, j;
1517 bool do_peeling = false;
1518 bool do_versioning = false;
1519 bool stat;
1520 gimple stmt;
1521 stmt_vec_info stmt_info;
1522 int vect_versioning_for_alias_required;
1523 unsigned int npeel = 0;
1524 bool all_misalignments_unknown = true;
1525 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1526 unsigned possible_npeel_number = 1;
1527 tree vectype;
1528 unsigned int nelements, mis, same_align_drs_max = 0;
1529 stmt_vector_for_cost body_cost_vec = NULL;
1531 if (vect_print_dump_info (REPORT_DETAILS))
1532 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1534 /* While cost model enhancements are expected in the future, the high level
1535 view of the code at this time is as follows:
1537 A) If there is a misaligned access then see if peeling to align
1538 this access can make all data references satisfy
1539 vect_supportable_dr_alignment. If so, update data structures
1540 as needed and return true.
1542 B) If peeling wasn't possible and there is a data reference with an
1543 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1544 then see if loop versioning checks can be used to make all data
1545 references satisfy vect_supportable_dr_alignment. If so, update
1546 data structures as needed and return true.
1548 C) If neither peeling nor versioning were successful then return false if
1549 any data reference does not satisfy vect_supportable_dr_alignment.
1551 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1553 Note, Possibility 3 above (which is peeling and versioning together) is not
1554 being done at this time. */
1556 /* (1) Peeling to force alignment. */
1558 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1559 Considerations:
1560 + How many accesses will become aligned due to the peeling
1561 - How many accesses will become unaligned due to the peeling,
1562 and the cost of misaligned accesses.
1563 - The cost of peeling (the extra runtime checks, the increase
1564 in code size). */
1566 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1568 stmt = DR_STMT (dr);
1569 stmt_info = vinfo_for_stmt (stmt);
1571 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1572 continue;
1574 /* For interleaving, only the alignment of the first access
1575 matters. */
1576 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1577 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1578 continue;
1580 /* FORNOW: Any strided load prevents peeling. The induction
1581 variable analysis will fail when the prologue loop is generated,
1582 and so we can't generate the new base for the pointer. */
1583 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1585 if (vect_print_dump_info (REPORT_DETAILS))
1586 fprintf (vect_dump, "strided load prevents peeling");
1587 do_peeling = false;
1588 break;
1591 /* For invariant accesses there is nothing to enhance. */
1592 if (integer_zerop (DR_STEP (dr)))
1593 continue;
1595 /* Strided loads perform only component accesses, alignment is
1596 irrelevant for them. */
1597 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1598 continue;
1600 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1601 do_peeling = vector_alignment_reachable_p (dr);
1602 if (do_peeling)
1604 if (known_alignment_for_access_p (dr))
1606 unsigned int npeel_tmp;
1607 bool negative = tree_int_cst_compare (DR_STEP (dr),
1608 size_zero_node) < 0;
1610 /* Save info about DR in the hash table. */
1611 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1612 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1613 htab_create (1, vect_peeling_hash,
1614 vect_peeling_hash_eq, free);
1616 vectype = STMT_VINFO_VECTYPE (stmt_info);
1617 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1618 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1619 TREE_TYPE (DR_REF (dr))));
1620 npeel_tmp = (negative
1621 ? (mis - nelements) : (nelements - mis))
1622 & (nelements - 1);
1624 /* For multiple types, it is possible that the bigger type access
1625 will have more than one peeling option. E.g., a loop with two
1626 types: one of size (vector size / 4), and the other one of
1627 size (vector size / 8). Vectorization factor will 8. If both
1628 access are misaligned by 3, the first one needs one scalar
1629 iteration to be aligned, and the second one needs 5. But the
1630 the first one will be aligned also by peeling 5 scalar
1631 iterations, and in that case both accesses will be aligned.
1632 Hence, except for the immediate peeling amount, we also want
1633 to try to add full vector size, while we don't exceed
1634 vectorization factor.
1635 We do this automtically for cost model, since we calculate cost
1636 for every peeling option. */
1637 if (!flag_vect_cost_model)
1638 possible_npeel_number = vf /nelements;
1640 /* Handle the aligned case. We may decide to align some other
1641 access, making DR unaligned. */
1642 if (DR_MISALIGNMENT (dr) == 0)
1644 npeel_tmp = 0;
1645 if (!flag_vect_cost_model)
1646 possible_npeel_number++;
1649 for (j = 0; j < possible_npeel_number; j++)
1651 gcc_assert (npeel_tmp <= vf);
1652 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1653 npeel_tmp += nelements;
1656 all_misalignments_unknown = false;
1657 /* Data-ref that was chosen for the case that all the
1658 misalignments are unknown is not relevant anymore, since we
1659 have a data-ref with known alignment. */
1660 dr0 = NULL;
1662 else
1664 /* If we don't know all the misalignment values, we prefer
1665 peeling for data-ref that has maximum number of data-refs
1666 with the same alignment, unless the target prefers to align
1667 stores over load. */
1668 if (all_misalignments_unknown)
1670 if (same_align_drs_max < VEC_length (dr_p,
1671 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1672 || !dr0)
1674 same_align_drs_max = VEC_length (dr_p,
1675 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1676 dr0 = dr;
1679 if (!first_store && DR_IS_WRITE (dr))
1680 first_store = dr;
1683 /* If there are both known and unknown misaligned accesses in the
1684 loop, we choose peeling amount according to the known
1685 accesses. */
1688 if (!supportable_dr_alignment)
1690 dr0 = dr;
1691 if (!first_store && DR_IS_WRITE (dr))
1692 first_store = dr;
1696 else
1698 if (!aligned_access_p (dr))
1700 if (vect_print_dump_info (REPORT_DETAILS))
1701 fprintf (vect_dump, "vector alignment may not be reachable");
1703 break;
1708 vect_versioning_for_alias_required
1709 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1711 /* Temporarily, if versioning for alias is required, we disable peeling
1712 until we support peeling and versioning. Often peeling for alignment
1713 will require peeling for loop-bound, which in turn requires that we
1714 know how to adjust the loop ivs after the loop. */
1715 if (vect_versioning_for_alias_required
1716 || !vect_can_advance_ivs_p (loop_vinfo)
1717 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1718 do_peeling = false;
1720 if (do_peeling && all_misalignments_unknown
1721 && vect_supportable_dr_alignment (dr0, false))
1724 /* Check if the target requires to prefer stores over loads, i.e., if
1725 misaligned stores are more expensive than misaligned loads (taking
1726 drs with same alignment into account). */
1727 if (first_store && DR_IS_READ (dr0))
1729 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1730 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1731 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1732 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1733 stmt_vector_for_cost dummy = VEC_alloc (stmt_info_for_cost, heap, 2);
1735 vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1736 &dummy);
1737 vect_get_data_access_cost (first_store, &store_inside_cost,
1738 &store_outside_cost, &dummy);
1740 VEC_free (stmt_info_for_cost, heap, dummy);
1742 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1743 aligning the load DR0). */
1744 load_inside_penalty = store_inside_cost;
1745 load_outside_penalty = store_outside_cost;
1746 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1747 (vinfo_for_stmt (DR_STMT (first_store))),
1748 i, dr);
1749 i++)
1750 if (DR_IS_READ (dr))
1752 load_inside_penalty += load_inside_cost;
1753 load_outside_penalty += load_outside_cost;
1755 else
1757 load_inside_penalty += store_inside_cost;
1758 load_outside_penalty += store_outside_cost;
1761 /* Calculate the penalty for leaving DR0 unaligned (by
1762 aligning the FIRST_STORE). */
1763 store_inside_penalty = load_inside_cost;
1764 store_outside_penalty = load_outside_cost;
1765 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1766 (vinfo_for_stmt (DR_STMT (dr0))),
1767 i, dr);
1768 i++)
1769 if (DR_IS_READ (dr))
1771 store_inside_penalty += load_inside_cost;
1772 store_outside_penalty += load_outside_cost;
1774 else
1776 store_inside_penalty += store_inside_cost;
1777 store_outside_penalty += store_outside_cost;
1780 if (load_inside_penalty > store_inside_penalty
1781 || (load_inside_penalty == store_inside_penalty
1782 && load_outside_penalty > store_outside_penalty))
1783 dr0 = first_store;
1786 /* In case there are only loads with different unknown misalignments, use
1787 peeling only if it may help to align other accesses in the loop. */
1788 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1789 (vinfo_for_stmt (DR_STMT (dr0))))
1790 && vect_supportable_dr_alignment (dr0, false)
1791 != dr_unaligned_supported)
1792 do_peeling = false;
1795 if (do_peeling && !dr0)
1797 /* Peeling is possible, but there is no data access that is not supported
1798 unless aligned. So we try to choose the best possible peeling. */
1800 /* We should get here only if there are drs with known misalignment. */
1801 gcc_assert (!all_misalignments_unknown);
1803 /* Choose the best peeling from the hash table. */
1804 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
1805 &body_cost_vec);
1806 if (!dr0 || !npeel)
1807 do_peeling = false;
1810 if (do_peeling)
1812 stmt = DR_STMT (dr0);
1813 stmt_info = vinfo_for_stmt (stmt);
1814 vectype = STMT_VINFO_VECTYPE (stmt_info);
1815 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1817 if (known_alignment_for_access_p (dr0))
1819 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1820 size_zero_node) < 0;
1821 if (!npeel)
1823 /* Since it's known at compile time, compute the number of
1824 iterations in the peeled loop (the peeling factor) for use in
1825 updating DR_MISALIGNMENT values. The peeling factor is the
1826 vectorization factor minus the misalignment as an element
1827 count. */
1828 mis = DR_MISALIGNMENT (dr0);
1829 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1830 npeel = ((negative ? mis - nelements : nelements - mis)
1831 & (nelements - 1));
1834 /* For interleaved data access every iteration accesses all the
1835 members of the group, therefore we divide the number of iterations
1836 by the group size. */
1837 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1838 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1839 npeel /= GROUP_SIZE (stmt_info);
1841 if (vect_print_dump_info (REPORT_DETAILS))
1842 fprintf (vect_dump, "Try peeling by %d", npeel);
1845 /* Ensure that all data refs can be vectorized after the peel. */
1846 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1848 int save_misalignment;
1850 if (dr == dr0)
1851 continue;
1853 stmt = DR_STMT (dr);
1854 stmt_info = vinfo_for_stmt (stmt);
1855 /* For interleaving, only the alignment of the first access
1856 matters. */
1857 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1858 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1859 continue;
1861 /* Strided loads perform only component accesses, alignment is
1862 irrelevant for them. */
1863 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1864 continue;
1866 save_misalignment = DR_MISALIGNMENT (dr);
1867 vect_update_misalignment_for_peel (dr, dr0, npeel);
1868 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1869 SET_DR_MISALIGNMENT (dr, save_misalignment);
1871 if (!supportable_dr_alignment)
1873 do_peeling = false;
1874 break;
1878 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1880 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1881 if (!stat)
1882 do_peeling = false;
1883 else
1885 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1886 return stat;
1890 if (do_peeling)
1892 stmt_info_for_cost *si;
1893 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
1895 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1896 If the misalignment of DR_i is identical to that of dr0 then set
1897 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1898 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1899 by the peeling factor times the element size of DR_i (MOD the
1900 vectorization factor times the size). Otherwise, the
1901 misalignment of DR_i must be set to unknown. */
1902 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1903 if (dr != dr0)
1904 vect_update_misalignment_for_peel (dr, dr0, npeel);
1906 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1907 if (npeel)
1908 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1909 else
1910 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1911 SET_DR_MISALIGNMENT (dr0, 0);
1912 if (vect_print_dump_info (REPORT_ALIGNMENT))
1913 fprintf (vect_dump, "Alignment of access forced using peeling.");
1915 if (vect_print_dump_info (REPORT_DETAILS))
1916 fprintf (vect_dump, "Peeling for alignment will be applied.");
1918 /* We've delayed passing the inside-loop peeling costs to the
1919 target cost model until we were sure peeling would happen.
1920 Do so now. */
1921 if (body_cost_vec)
1923 FOR_EACH_VEC_ELT (stmt_info_for_cost, body_cost_vec, i, si)
1925 struct _stmt_vec_info *stmt_info
1926 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1927 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1928 si->misalign, vect_body);
1930 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1933 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1934 gcc_assert (stat);
1935 return stat;
1937 else
1938 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1942 /* (2) Versioning to force alignment. */
1944 /* Try versioning if:
1945 1) flag_tree_vect_loop_version is TRUE
1946 2) optimize loop for speed
1947 3) there is at least one unsupported misaligned data ref with an unknown
1948 misalignment, and
1949 4) all misaligned data refs with a known misalignment are supported, and
1950 5) the number of runtime alignment checks is within reason. */
1952 do_versioning =
1953 flag_tree_vect_loop_version
1954 && optimize_loop_nest_for_speed_p (loop)
1955 && (!loop->inner); /* FORNOW */
1957 if (do_versioning)
1959 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1961 stmt = DR_STMT (dr);
1962 stmt_info = vinfo_for_stmt (stmt);
1964 /* For interleaving, only the alignment of the first access
1965 matters. */
1966 if (aligned_access_p (dr)
1967 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1968 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1969 continue;
1971 /* Strided loads perform only component accesses, alignment is
1972 irrelevant for them. */
1973 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1974 continue;
1976 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1978 if (!supportable_dr_alignment)
1980 gimple stmt;
1981 int mask;
1982 tree vectype;
1984 if (known_alignment_for_access_p (dr)
1985 || VEC_length (gimple,
1986 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1987 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1989 do_versioning = false;
1990 break;
1993 stmt = DR_STMT (dr);
1994 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1995 gcc_assert (vectype);
1997 /* The rightmost bits of an aligned address must be zeros.
1998 Construct the mask needed for this test. For example,
1999 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2000 mask must be 15 = 0xf. */
2001 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2003 /* FORNOW: use the same mask to test all potentially unaligned
2004 references in the loop. The vectorizer currently supports
2005 a single vector size, see the reference to
2006 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2007 vectorization factor is computed. */
2008 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2009 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2010 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2011 VEC_safe_push (gimple, heap,
2012 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2013 DR_STMT (dr));
2017 /* Versioning requires at least one misaligned data reference. */
2018 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2019 do_versioning = false;
2020 else if (!do_versioning)
2021 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2024 if (do_versioning)
2026 VEC(gimple,heap) *may_misalign_stmts
2027 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2028 gimple stmt;
2030 /* It can now be assumed that the data references in the statements
2031 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2032 of the loop being vectorized. */
2033 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
2035 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2036 dr = STMT_VINFO_DATA_REF (stmt_info);
2037 SET_DR_MISALIGNMENT (dr, 0);
2038 if (vect_print_dump_info (REPORT_ALIGNMENT))
2039 fprintf (vect_dump, "Alignment of access forced using versioning.");
2042 if (vect_print_dump_info (REPORT_DETAILS))
2043 fprintf (vect_dump, "Versioning for alignment will be applied.");
2045 /* Peeling and versioning can't be done together at this time. */
2046 gcc_assert (! (do_peeling && do_versioning));
2048 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2049 gcc_assert (stat);
2050 return stat;
2053 /* This point is reached if neither peeling nor versioning is being done. */
2054 gcc_assert (! (do_peeling || do_versioning));
2056 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2057 return stat;
2061 /* Function vect_find_same_alignment_drs.
2063 Update group and alignment relations according to the chosen
2064 vectorization factor. */
2066 static void
2067 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
2068 loop_vec_info loop_vinfo)
2070 unsigned int i;
2071 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2072 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2073 struct data_reference *dra = DDR_A (ddr);
2074 struct data_reference *drb = DDR_B (ddr);
2075 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2076 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2077 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
2078 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
2079 lambda_vector dist_v;
2080 unsigned int loop_depth;
2082 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2083 return;
2085 if (dra == drb)
2086 return;
2088 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2089 return;
2091 /* Loop-based vectorization and known data dependence. */
2092 if (DDR_NUM_DIST_VECTS (ddr) == 0)
2093 return;
2095 /* Data-dependence analysis reports a distance vector of zero
2096 for data-references that overlap only in the first iteration
2097 but have different sign step (see PR45764).
2098 So as a sanity check require equal DR_STEP. */
2099 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2100 return;
2102 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
2103 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
2105 int dist = dist_v[loop_depth];
2107 if (vect_print_dump_info (REPORT_DR_DETAILS))
2108 fprintf (vect_dump, "dependence distance = %d.", dist);
2110 /* Same loop iteration. */
2111 if (dist == 0
2112 || (dist % vectorization_factor == 0 && dra_size == drb_size))
2114 /* Two references with distance zero have the same alignment. */
2115 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
2116 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
2117 if (vect_print_dump_info (REPORT_ALIGNMENT))
2118 fprintf (vect_dump, "accesses have the same alignment.");
2119 if (vect_print_dump_info (REPORT_DR_DETAILS))
2121 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
2122 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
2123 fprintf (vect_dump, " and ");
2124 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
2131 /* Function vect_analyze_data_refs_alignment
2133 Analyze the alignment of the data-references in the loop.
2134 Return FALSE if a data reference is found that cannot be vectorized. */
2136 bool
2137 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2138 bb_vec_info bb_vinfo)
2140 if (vect_print_dump_info (REPORT_DETAILS))
2141 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2143 /* Mark groups of data references with same alignment using
2144 data dependence information. */
2145 if (loop_vinfo)
2147 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2148 struct data_dependence_relation *ddr;
2149 unsigned int i;
2151 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2152 vect_find_same_alignment_drs (ddr, loop_vinfo);
2155 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2157 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2158 fprintf (vect_dump,
2159 "not vectorized: can't calculate alignment for data ref.");
2160 return false;
2163 return true;
2167 /* Analyze groups of accesses: check that DR belongs to a group of
2168 accesses of legal size, step, etc. Detect gaps, single element
2169 interleaving, and other special cases. Set grouped access info.
2170 Collect groups of strided stores for further use in SLP analysis. */
2172 static bool
2173 vect_analyze_group_access (struct data_reference *dr)
2175 tree step = DR_STEP (dr);
2176 tree scalar_type = TREE_TYPE (DR_REF (dr));
2177 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2178 gimple stmt = DR_STMT (dr);
2179 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2180 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2181 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2182 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2183 HOST_WIDE_INT groupsize, last_accessed_element = 1;
2184 bool slp_impossible = false;
2185 struct loop *loop = NULL;
2187 if (loop_vinfo)
2188 loop = LOOP_VINFO_LOOP (loop_vinfo);
2190 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2191 size of the interleaving group (including gaps). */
2192 groupsize = dr_step / type_size;
2194 /* Not consecutive access is possible only if it is a part of interleaving. */
2195 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2197 /* Check if it this DR is a part of interleaving, and is a single
2198 element of the group that is accessed in the loop. */
2200 /* Gaps are supported only for loads. STEP must be a multiple of the type
2201 size. The size of the group must be a power of 2. */
2202 if (DR_IS_READ (dr)
2203 && (dr_step % type_size) == 0
2204 && groupsize > 0
2205 && exact_log2 (groupsize) != -1)
2207 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2208 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2209 if (vect_print_dump_info (REPORT_DR_DETAILS))
2211 fprintf (vect_dump, "Detected single element interleaving ");
2212 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2213 fprintf (vect_dump, " step ");
2214 print_generic_expr (vect_dump, step, TDF_SLIM);
2217 if (loop_vinfo)
2219 if (vect_print_dump_info (REPORT_DETAILS))
2220 fprintf (vect_dump, "Data access with gaps requires scalar "
2221 "epilogue loop");
2222 if (loop->inner)
2224 if (vect_print_dump_info (REPORT_DETAILS))
2225 fprintf (vect_dump, "Peeling for outer loop is not"
2226 " supported");
2227 return false;
2230 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2233 return true;
2236 if (vect_print_dump_info (REPORT_DETAILS))
2238 fprintf (vect_dump, "not consecutive access ");
2239 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2242 if (bb_vinfo)
2244 /* Mark the statement as unvectorizable. */
2245 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2246 return true;
2249 return false;
2252 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2254 /* First stmt in the interleaving chain. Check the chain. */
2255 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2256 struct data_reference *data_ref = dr;
2257 unsigned int count = 1;
2258 tree next_step;
2259 tree prev_init = DR_INIT (data_ref);
2260 gimple prev = stmt;
2261 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2263 while (next)
2265 /* Skip same data-refs. In case that two or more stmts share
2266 data-ref (supported only for loads), we vectorize only the first
2267 stmt, and the rest get their vectorized loads from the first
2268 one. */
2269 if (!tree_int_cst_compare (DR_INIT (data_ref),
2270 DR_INIT (STMT_VINFO_DATA_REF (
2271 vinfo_for_stmt (next)))))
2273 if (DR_IS_WRITE (data_ref))
2275 if (vect_print_dump_info (REPORT_DETAILS))
2276 fprintf (vect_dump, "Two store stmts share the same dr.");
2277 return false;
2280 /* Check that there is no load-store dependencies for this loads
2281 to prevent a case of load-store-load to the same location. */
2282 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2283 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2285 if (vect_print_dump_info (REPORT_DETAILS))
2286 fprintf (vect_dump,
2287 "READ_WRITE dependence in interleaving.");
2288 return false;
2291 /* For load use the same data-ref load. */
2292 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2294 prev = next;
2295 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2296 continue;
2299 prev = next;
2301 /* Check that all the accesses have the same STEP. */
2302 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2303 if (tree_int_cst_compare (step, next_step))
2305 if (vect_print_dump_info (REPORT_DETAILS))
2306 fprintf (vect_dump, "not consecutive access in interleaving");
2307 return false;
2310 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2311 /* Check that the distance between two accesses is equal to the type
2312 size. Otherwise, we have gaps. */
2313 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2314 - TREE_INT_CST_LOW (prev_init)) / type_size;
2315 if (diff != 1)
2317 /* FORNOW: SLP of accesses with gaps is not supported. */
2318 slp_impossible = true;
2319 if (DR_IS_WRITE (data_ref))
2321 if (vect_print_dump_info (REPORT_DETAILS))
2322 fprintf (vect_dump, "interleaved store with gaps");
2323 return false;
2326 gaps += diff - 1;
2329 last_accessed_element += diff;
2331 /* Store the gap from the previous member of the group. If there is no
2332 gap in the access, GROUP_GAP is always 1. */
2333 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2335 prev_init = DR_INIT (data_ref);
2336 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2337 /* Count the number of data-refs in the chain. */
2338 count++;
2341 /* COUNT is the number of accesses found, we multiply it by the size of
2342 the type to get COUNT_IN_BYTES. */
2343 count_in_bytes = type_size * count;
2345 /* Check that the size of the interleaving (including gaps) is not
2346 greater than STEP. */
2347 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2349 if (vect_print_dump_info (REPORT_DETAILS))
2351 fprintf (vect_dump, "interleaving size is greater than step for ");
2352 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2354 return false;
2357 /* Check that the size of the interleaving is equal to STEP for stores,
2358 i.e., that there are no gaps. */
2359 if (dr_step && dr_step != count_in_bytes)
2361 if (DR_IS_READ (dr))
2363 slp_impossible = true;
2364 /* There is a gap after the last load in the group. This gap is a
2365 difference between the groupsize and the number of elements.
2366 When there is no gap, this difference should be 0. */
2367 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
2369 else
2371 if (vect_print_dump_info (REPORT_DETAILS))
2372 fprintf (vect_dump, "interleaved store with gaps");
2373 return false;
2377 /* Check that STEP is a multiple of type size. */
2378 if (dr_step && (dr_step % type_size) != 0)
2380 if (vect_print_dump_info (REPORT_DETAILS))
2382 fprintf (vect_dump, "step is not a multiple of type size: step ");
2383 print_generic_expr (vect_dump, step, TDF_SLIM);
2384 fprintf (vect_dump, " size ");
2385 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2386 TDF_SLIM);
2388 return false;
2391 if (groupsize == 0)
2392 groupsize = count;
2394 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2395 if (vect_print_dump_info (REPORT_DETAILS))
2396 fprintf (vect_dump, "Detected interleaving of size %d", (int)groupsize);
2398 /* SLP: create an SLP data structure for every interleaving group of
2399 stores for further analysis in vect_analyse_slp. */
2400 if (DR_IS_WRITE (dr) && !slp_impossible)
2402 if (loop_vinfo)
2403 VEC_safe_push (gimple, heap, LOOP_VINFO_GROUPED_STORES (loop_vinfo),
2404 stmt);
2405 if (bb_vinfo)
2406 VEC_safe_push (gimple, heap, BB_VINFO_GROUPED_STORES (bb_vinfo),
2407 stmt);
2410 /* There is a gap in the end of the group. */
2411 if (groupsize - last_accessed_element > 0 && loop_vinfo)
2413 if (vect_print_dump_info (REPORT_DETAILS))
2414 fprintf (vect_dump, "Data access with gaps requires scalar "
2415 "epilogue loop");
2416 if (loop->inner)
2418 if (vect_print_dump_info (REPORT_DETAILS))
2419 fprintf (vect_dump, "Peeling for outer loop is not supported");
2420 return false;
2423 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2427 return true;
2431 /* Analyze the access pattern of the data-reference DR.
2432 In case of non-consecutive accesses call vect_analyze_group_access() to
2433 analyze groups of accesses. */
2435 static bool
2436 vect_analyze_data_ref_access (struct data_reference *dr)
2438 tree step = DR_STEP (dr);
2439 tree scalar_type = TREE_TYPE (DR_REF (dr));
2440 gimple stmt = DR_STMT (dr);
2441 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2442 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2443 struct loop *loop = NULL;
2445 if (loop_vinfo)
2446 loop = LOOP_VINFO_LOOP (loop_vinfo);
2448 if (loop_vinfo && !step)
2450 if (vect_print_dump_info (REPORT_DETAILS))
2451 fprintf (vect_dump, "bad data-ref access in loop");
2452 return false;
2455 /* Allow invariant loads in loops. */
2456 if (loop_vinfo && integer_zerop (step))
2458 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2459 return DR_IS_READ (dr);
2462 if (loop && nested_in_vect_loop_p (loop, stmt))
2464 /* Interleaved accesses are not yet supported within outer-loop
2465 vectorization for references in the inner-loop. */
2466 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2468 /* For the rest of the analysis we use the outer-loop step. */
2469 step = STMT_VINFO_DR_STEP (stmt_info);
2470 if (integer_zerop (step))
2472 if (vect_print_dump_info (REPORT_ALIGNMENT))
2473 fprintf (vect_dump, "zero step in outer loop.");
2474 if (DR_IS_READ (dr))
2475 return true;
2476 else
2477 return false;
2481 /* Consecutive? */
2482 if (TREE_CODE (step) == INTEGER_CST)
2484 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2485 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2486 || (dr_step < 0
2487 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2489 /* Mark that it is not interleaving. */
2490 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2491 return true;
2495 if (loop && nested_in_vect_loop_p (loop, stmt))
2497 if (vect_print_dump_info (REPORT_ALIGNMENT))
2498 fprintf (vect_dump, "grouped access in outer loop.");
2499 return false;
2502 /* Assume this is a DR handled by non-constant strided load case. */
2503 if (TREE_CODE (step) != INTEGER_CST)
2504 return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2506 /* Not consecutive access - check if it's a part of interleaving group. */
2507 return vect_analyze_group_access (dr);
2511 /* Function vect_analyze_data_ref_accesses.
2513 Analyze the access pattern of all the data references in the loop.
2515 FORNOW: the only access pattern that is considered vectorizable is a
2516 simple step 1 (consecutive) access.
2518 FORNOW: handle only arrays and pointer accesses. */
2520 bool
2521 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2523 unsigned int i;
2524 VEC (data_reference_p, heap) *datarefs;
2525 struct data_reference *dr;
2527 if (vect_print_dump_info (REPORT_DETAILS))
2528 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2530 if (loop_vinfo)
2531 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2532 else
2533 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2535 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2536 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2537 && !vect_analyze_data_ref_access (dr))
2539 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2540 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2542 if (bb_vinfo)
2544 /* Mark the statement as not vectorizable. */
2545 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2546 continue;
2548 else
2549 return false;
2552 return true;
2555 /* Function vect_prune_runtime_alias_test_list.
2557 Prune a list of ddrs to be tested at run-time by versioning for alias.
2558 Return FALSE if resulting list of ddrs is longer then allowed by
2559 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2561 bool
2562 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2564 VEC (ddr_p, heap) * ddrs =
2565 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2566 unsigned i, j;
2568 if (vect_print_dump_info (REPORT_DETAILS))
2569 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2571 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2573 bool found;
2574 ddr_p ddr_i;
2576 ddr_i = VEC_index (ddr_p, ddrs, i);
2577 found = false;
2579 for (j = 0; j < i; j++)
2581 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2583 if (vect_vfa_range_equal (ddr_i, ddr_j))
2585 if (vect_print_dump_info (REPORT_DR_DETAILS))
2587 fprintf (vect_dump, "found equal ranges ");
2588 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2589 fprintf (vect_dump, ", ");
2590 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2591 fprintf (vect_dump, " and ");
2592 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2593 fprintf (vect_dump, ", ");
2594 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2596 found = true;
2597 break;
2601 if (found)
2603 VEC_ordered_remove (ddr_p, ddrs, i);
2604 continue;
2606 i++;
2609 if (VEC_length (ddr_p, ddrs) >
2610 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2612 if (vect_print_dump_info (REPORT_DR_DETAILS))
2614 fprintf (vect_dump,
2615 "disable versioning for alias - max number of generated "
2616 "checks exceeded.");
2619 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2621 return false;
2624 return true;
2627 /* Check whether a non-affine read in stmt is suitable for gather load
2628 and if so, return a builtin decl for that operation. */
2630 tree
2631 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2632 tree *offp, int *scalep)
2634 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2635 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2637 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2638 tree offtype = NULL_TREE;
2639 tree decl, base, off;
2640 enum machine_mode pmode;
2641 int punsignedp, pvolatilep;
2643 /* The gather builtins need address of the form
2644 loop_invariant + vector * {1, 2, 4, 8}
2646 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2647 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2648 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2649 multiplications and additions in it. To get a vector, we need
2650 a single SSA_NAME that will be defined in the loop and will
2651 contain everything that is not loop invariant and that can be
2652 vectorized. The following code attempts to find such a preexistng
2653 SSA_NAME OFF and put the loop invariants into a tree BASE
2654 that can be gimplified before the loop. */
2655 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2656 &pmode, &punsignedp, &pvolatilep, false);
2657 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2659 if (TREE_CODE (base) == MEM_REF)
2661 if (!integer_zerop (TREE_OPERAND (base, 1)))
2663 if (off == NULL_TREE)
2665 double_int moff = mem_ref_offset (base);
2666 off = double_int_to_tree (sizetype, moff);
2668 else
2669 off = size_binop (PLUS_EXPR, off,
2670 fold_convert (sizetype, TREE_OPERAND (base, 1)));
2672 base = TREE_OPERAND (base, 0);
2674 else
2675 base = build_fold_addr_expr (base);
2677 if (off == NULL_TREE)
2678 off = size_zero_node;
2680 /* If base is not loop invariant, either off is 0, then we start with just
2681 the constant offset in the loop invariant BASE and continue with base
2682 as OFF, otherwise give up.
2683 We could handle that case by gimplifying the addition of base + off
2684 into some SSA_NAME and use that as off, but for now punt. */
2685 if (!expr_invariant_in_loop_p (loop, base))
2687 if (!integer_zerop (off))
2688 return NULL_TREE;
2689 off = base;
2690 base = size_int (pbitpos / BITS_PER_UNIT);
2692 /* Otherwise put base + constant offset into the loop invariant BASE
2693 and continue with OFF. */
2694 else
2696 base = fold_convert (sizetype, base);
2697 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2700 /* OFF at this point may be either a SSA_NAME or some tree expression
2701 from get_inner_reference. Try to peel off loop invariants from it
2702 into BASE as long as possible. */
2703 STRIP_NOPS (off);
2704 while (offtype == NULL_TREE)
2706 enum tree_code code;
2707 tree op0, op1, add = NULL_TREE;
2709 if (TREE_CODE (off) == SSA_NAME)
2711 gimple def_stmt = SSA_NAME_DEF_STMT (off);
2713 if (expr_invariant_in_loop_p (loop, off))
2714 return NULL_TREE;
2716 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2717 break;
2719 op0 = gimple_assign_rhs1 (def_stmt);
2720 code = gimple_assign_rhs_code (def_stmt);
2721 op1 = gimple_assign_rhs2 (def_stmt);
2723 else
2725 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2726 return NULL_TREE;
2727 code = TREE_CODE (off);
2728 extract_ops_from_tree (off, &code, &op0, &op1);
2730 switch (code)
2732 case POINTER_PLUS_EXPR:
2733 case PLUS_EXPR:
2734 if (expr_invariant_in_loop_p (loop, op0))
2736 add = op0;
2737 off = op1;
2738 do_add:
2739 add = fold_convert (sizetype, add);
2740 if (scale != 1)
2741 add = size_binop (MULT_EXPR, add, size_int (scale));
2742 base = size_binop (PLUS_EXPR, base, add);
2743 continue;
2745 if (expr_invariant_in_loop_p (loop, op1))
2747 add = op1;
2748 off = op0;
2749 goto do_add;
2751 break;
2752 case MINUS_EXPR:
2753 if (expr_invariant_in_loop_p (loop, op1))
2755 add = fold_convert (sizetype, op1);
2756 add = size_binop (MINUS_EXPR, size_zero_node, add);
2757 off = op0;
2758 goto do_add;
2760 break;
2761 case MULT_EXPR:
2762 if (scale == 1 && host_integerp (op1, 0))
2764 scale = tree_low_cst (op1, 0);
2765 off = op0;
2766 continue;
2768 break;
2769 case SSA_NAME:
2770 off = op0;
2771 continue;
2772 CASE_CONVERT:
2773 if (!POINTER_TYPE_P (TREE_TYPE (op0))
2774 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2775 break;
2776 if (TYPE_PRECISION (TREE_TYPE (op0))
2777 == TYPE_PRECISION (TREE_TYPE (off)))
2779 off = op0;
2780 continue;
2782 if (TYPE_PRECISION (TREE_TYPE (op0))
2783 < TYPE_PRECISION (TREE_TYPE (off)))
2785 off = op0;
2786 offtype = TREE_TYPE (off);
2787 STRIP_NOPS (off);
2788 continue;
2790 break;
2791 default:
2792 break;
2794 break;
2797 /* If at the end OFF still isn't a SSA_NAME or isn't
2798 defined in the loop, punt. */
2799 if (TREE_CODE (off) != SSA_NAME
2800 || expr_invariant_in_loop_p (loop, off))
2801 return NULL_TREE;
2803 if (offtype == NULL_TREE)
2804 offtype = TREE_TYPE (off);
2806 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2807 offtype, scale);
2808 if (decl == NULL_TREE)
2809 return NULL_TREE;
2811 if (basep)
2812 *basep = base;
2813 if (offp)
2814 *offp = off;
2815 if (scalep)
2816 *scalep = scale;
2817 return decl;
2820 /* Check wether a non-affine load in STMT (being in the loop referred to
2821 in LOOP_VINFO) is suitable for handling as strided load. That is the case
2822 if its address is a simple induction variable. If so return the base
2823 of that induction variable in *BASEP and the (loop-invariant) step
2824 in *STEPP, both only when that pointer is non-zero.
2826 This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
2827 base pointer) only. */
2829 bool
2830 vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2831 tree *stepp)
2833 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2834 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2835 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2836 tree base, off;
2837 affine_iv iv;
2839 if (!DR_IS_READ (dr))
2840 return false;
2842 base = DR_REF (dr);
2844 if (TREE_CODE (base) == ARRAY_REF)
2846 off = TREE_OPERAND (base, 1);
2847 base = TREE_OPERAND (base, 0);
2849 else if (TREE_CODE (base) == MEM_REF)
2851 off = TREE_OPERAND (base, 0);
2852 base = TREE_OPERAND (base, 1);
2854 else
2855 return false;
2857 if (TREE_CODE (off) != SSA_NAME)
2858 return false;
2860 if (!expr_invariant_in_loop_p (loop, base)
2861 || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
2862 return false;
2864 if (basep)
2865 *basep = iv.base;
2866 if (stepp)
2867 *stepp = iv.step;
2868 return true;
2871 /* Function vect_analyze_data_refs.
2873 Find all the data references in the loop or basic block.
2875 The general structure of the analysis of data refs in the vectorizer is as
2876 follows:
2877 1- vect_analyze_data_refs(loop/bb): call
2878 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2879 in the loop/bb and their dependences.
2880 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2881 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2882 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2886 bool
2887 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2888 bb_vec_info bb_vinfo,
2889 int *min_vf)
2891 struct loop *loop = NULL;
2892 basic_block bb = NULL;
2893 unsigned int i;
2894 VEC (data_reference_p, heap) *datarefs;
2895 struct data_reference *dr;
2896 tree scalar_type;
2897 bool res, stop_bb_analysis = false;
2899 if (vect_print_dump_info (REPORT_DETAILS))
2900 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2902 if (loop_vinfo)
2904 loop = LOOP_VINFO_LOOP (loop_vinfo);
2905 res = compute_data_dependences_for_loop
2906 (loop, true,
2907 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2908 &LOOP_VINFO_DATAREFS (loop_vinfo),
2909 &LOOP_VINFO_DDRS (loop_vinfo));
2911 if (!res)
2913 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2914 fprintf (vect_dump, "not vectorized: loop contains function calls"
2915 " or data references that cannot be analyzed");
2916 return false;
2919 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2921 else
2923 gimple_stmt_iterator gsi;
2925 bb = BB_VINFO_BB (bb_vinfo);
2926 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2928 gimple stmt = gsi_stmt (gsi);
2929 if (!find_data_references_in_stmt (NULL, stmt,
2930 &BB_VINFO_DATAREFS (bb_vinfo)))
2932 /* Mark the rest of the basic-block as unvectorizable. */
2933 for (; !gsi_end_p (gsi); gsi_next (&gsi))
2935 stmt = gsi_stmt (gsi);
2936 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
2938 break;
2941 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
2942 &BB_VINFO_DDRS (bb_vinfo), NULL, true))
2944 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2945 fprintf (vect_dump, "not vectorized: basic block contains function"
2946 " calls or data references that cannot be analyzed");
2947 return false;
2950 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2953 /* Go through the data-refs, check that the analysis succeeded. Update
2954 pointer from stmt_vec_info struct to DR and vectype. */
2956 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2958 gimple stmt;
2959 stmt_vec_info stmt_info;
2960 tree base, offset, init;
2961 bool gather = false;
2962 int vf;
2964 if (!dr || !DR_REF (dr))
2966 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2967 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2969 return false;
2972 stmt = DR_STMT (dr);
2973 stmt_info = vinfo_for_stmt (stmt);
2975 if (stop_bb_analysis)
2977 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2978 continue;
2981 /* Check that analysis of the data-ref succeeded. */
2982 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2983 || !DR_STEP (dr))
2985 /* If target supports vector gather loads, see if they can't
2986 be used. */
2987 if (loop_vinfo
2988 && DR_IS_READ (dr)
2989 && !TREE_THIS_VOLATILE (DR_REF (dr))
2990 && targetm.vectorize.builtin_gather != NULL
2991 && !nested_in_vect_loop_p (loop, stmt))
2993 struct data_reference *newdr
2994 = create_data_ref (NULL, loop_containing_stmt (stmt),
2995 DR_REF (dr), stmt, true);
2996 gcc_assert (newdr != NULL && DR_REF (newdr));
2997 if (DR_BASE_ADDRESS (newdr)
2998 && DR_OFFSET (newdr)
2999 && DR_INIT (newdr)
3000 && DR_STEP (newdr)
3001 && integer_zerop (DR_STEP (newdr)))
3003 dr = newdr;
3004 gather = true;
3006 else
3007 free_data_ref (newdr);
3010 if (!gather)
3012 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3014 fprintf (vect_dump, "not vectorized: data ref analysis "
3015 "failed ");
3016 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3019 if (bb_vinfo)
3021 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3022 stop_bb_analysis = true;
3023 continue;
3026 return false;
3030 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3032 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3033 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3034 "constant");
3036 if (bb_vinfo)
3038 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3039 stop_bb_analysis = true;
3040 continue;
3043 if (gather)
3044 free_data_ref (dr);
3045 return false;
3048 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3050 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3052 fprintf (vect_dump, "not vectorized: volatile type ");
3053 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3056 if (bb_vinfo)
3058 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3059 stop_bb_analysis = true;
3060 continue;
3063 return false;
3066 if (stmt_can_throw_internal (stmt))
3068 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3070 fprintf (vect_dump, "not vectorized: statement can throw an "
3071 "exception ");
3072 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3075 if (bb_vinfo)
3077 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3078 stop_bb_analysis = true;
3079 continue;
3082 if (gather)
3083 free_data_ref (dr);
3084 return false;
3087 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3088 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3090 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3092 fprintf (vect_dump, "not vectorized: statement is bitfield "
3093 "access ");
3094 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3097 if (bb_vinfo)
3099 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3100 stop_bb_analysis = true;
3101 continue;
3104 if (gather)
3105 free_data_ref (dr);
3106 return false;
3109 base = unshare_expr (DR_BASE_ADDRESS (dr));
3110 offset = unshare_expr (DR_OFFSET (dr));
3111 init = unshare_expr (DR_INIT (dr));
3113 if (is_gimple_call (stmt))
3115 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3117 fprintf (vect_dump, "not vectorized: dr in a call ");
3118 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3121 if (bb_vinfo)
3123 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3124 stop_bb_analysis = true;
3125 continue;
3128 if (gather)
3129 free_data_ref (dr);
3130 return false;
3133 /* Update DR field in stmt_vec_info struct. */
3135 /* If the dataref is in an inner-loop of the loop that is considered for
3136 for vectorization, we also want to analyze the access relative to
3137 the outer-loop (DR contains information only relative to the
3138 inner-most enclosing loop). We do that by building a reference to the
3139 first location accessed by the inner-loop, and analyze it relative to
3140 the outer-loop. */
3141 if (loop && nested_in_vect_loop_p (loop, stmt))
3143 tree outer_step, outer_base, outer_init;
3144 HOST_WIDE_INT pbitsize, pbitpos;
3145 tree poffset;
3146 enum machine_mode pmode;
3147 int punsignedp, pvolatilep;
3148 affine_iv base_iv, offset_iv;
3149 tree dinit;
3151 /* Build a reference to the first location accessed by the
3152 inner-loop: *(BASE+INIT). (The first location is actually
3153 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3154 tree inner_base = build_fold_indirect_ref
3155 (fold_build_pointer_plus (base, init));
3157 if (vect_print_dump_info (REPORT_DETAILS))
3159 fprintf (vect_dump, "analyze in outer-loop: ");
3160 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
3163 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3164 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3165 gcc_assert (outer_base != NULL_TREE);
3167 if (pbitpos % BITS_PER_UNIT != 0)
3169 if (vect_print_dump_info (REPORT_DETAILS))
3170 fprintf (vect_dump, "failed: bit offset alignment.\n");
3171 return false;
3174 outer_base = build_fold_addr_expr (outer_base);
3175 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3176 &base_iv, false))
3178 if (vect_print_dump_info (REPORT_DETAILS))
3179 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
3180 return false;
3183 if (offset)
3185 if (poffset)
3186 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
3187 poffset);
3188 else
3189 poffset = offset;
3192 if (!poffset)
3194 offset_iv.base = ssize_int (0);
3195 offset_iv.step = ssize_int (0);
3197 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3198 &offset_iv, false))
3200 if (vect_print_dump_info (REPORT_DETAILS))
3201 fprintf (vect_dump, "evolution of offset is not affine.\n");
3202 return false;
3205 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3206 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3207 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3208 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3209 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3211 outer_step = size_binop (PLUS_EXPR,
3212 fold_convert (ssizetype, base_iv.step),
3213 fold_convert (ssizetype, offset_iv.step));
3215 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3216 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3217 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3218 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3219 STMT_VINFO_DR_OFFSET (stmt_info) =
3220 fold_convert (ssizetype, offset_iv.base);
3221 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3222 size_int (highest_pow2_factor (offset_iv.base));
3224 if (vect_print_dump_info (REPORT_DETAILS))
3226 fprintf (vect_dump, "\touter base_address: ");
3227 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3228 fprintf (vect_dump, "\n\touter offset from base address: ");
3229 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3230 fprintf (vect_dump, "\n\touter constant offset from base address: ");
3231 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3232 fprintf (vect_dump, "\n\touter step: ");
3233 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3234 fprintf (vect_dump, "\n\touter aligned to: ");
3235 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3239 if (STMT_VINFO_DATA_REF (stmt_info))
3241 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3243 fprintf (vect_dump,
3244 "not vectorized: more than one data ref in stmt: ");
3245 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3248 if (bb_vinfo)
3250 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3251 stop_bb_analysis = true;
3252 continue;
3255 if (gather)
3256 free_data_ref (dr);
3257 return false;
3260 STMT_VINFO_DATA_REF (stmt_info) = dr;
3262 /* Set vectype for STMT. */
3263 scalar_type = TREE_TYPE (DR_REF (dr));
3264 STMT_VINFO_VECTYPE (stmt_info) =
3265 get_vectype_for_scalar_type (scalar_type);
3266 if (!STMT_VINFO_VECTYPE (stmt_info))
3268 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3270 fprintf (vect_dump,
3271 "not vectorized: no vectype for stmt: ");
3272 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3273 fprintf (vect_dump, " scalar_type: ");
3274 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3277 if (bb_vinfo)
3279 /* Mark the statement as not vectorizable. */
3280 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3281 stop_bb_analysis = true;
3282 continue;
3285 if (gather)
3287 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3288 free_data_ref (dr);
3290 return false;
3293 /* Adjust the minimal vectorization factor according to the
3294 vector type. */
3295 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3296 if (vf > *min_vf)
3297 *min_vf = vf;
3299 if (gather)
3301 unsigned int j, k, n;
3302 struct data_reference *olddr
3303 = VEC_index (data_reference_p, datarefs, i);
3304 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3305 struct data_dependence_relation *ddr, *newddr;
3306 bool bad = false;
3307 tree off;
3308 VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3310 gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3311 if (gather
3312 && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3313 gather = false;
3314 if (!gather)
3316 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3318 fprintf (vect_dump,
3319 "not vectorized: not suitable for gather load ");
3320 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3322 return false;
3325 n = VEC_length (data_reference_p, datarefs) - 1;
3326 for (j = 0, k = i - 1; j < i; j++)
3328 ddr = VEC_index (ddr_p, ddrs, k);
3329 gcc_assert (DDR_B (ddr) == olddr);
3330 newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3331 nest);
3332 VEC_replace (ddr_p, ddrs, k, newddr);
3333 free_dependence_relation (ddr);
3334 if (!bad
3335 && DR_IS_WRITE (DDR_A (newddr))
3336 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3337 bad = true;
3338 k += --n;
3341 k++;
3342 n = k + VEC_length (data_reference_p, datarefs) - i - 1;
3343 for (; k < n; k++)
3345 ddr = VEC_index (ddr_p, ddrs, k);
3346 gcc_assert (DDR_A (ddr) == olddr);
3347 newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3348 nest);
3349 VEC_replace (ddr_p, ddrs, k, newddr);
3350 free_dependence_relation (ddr);
3351 if (!bad
3352 && DR_IS_WRITE (DDR_B (newddr))
3353 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3354 bad = true;
3357 k = VEC_length (ddr_p, ddrs)
3358 - VEC_length (data_reference_p, datarefs) + i;
3359 ddr = VEC_index (ddr_p, ddrs, k);
3360 gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3361 newddr = initialize_data_dependence_relation (dr, dr, nest);
3362 VEC_replace (ddr_p, ddrs, k, newddr);
3363 free_dependence_relation (ddr);
3364 VEC_replace (data_reference_p, datarefs, i, dr);
3366 if (bad)
3368 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3370 fprintf (vect_dump,
3371 "not vectorized: data dependence conflict"
3372 " prevents gather load");
3373 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3375 return false;
3378 STMT_VINFO_GATHER_P (stmt_info) = true;
3380 else if (loop_vinfo
3381 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3383 bool strided_load = false;
3384 if (!nested_in_vect_loop_p (loop, stmt))
3385 strided_load
3386 = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
3387 if (!strided_load)
3389 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3391 fprintf (vect_dump,
3392 "not vectorized: not suitable for strided load ");
3393 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3395 return false;
3397 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
3401 return true;
3405 /* Function vect_get_new_vect_var.
3407 Returns a name for a new variable. The current naming scheme appends the
3408 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3409 the name of vectorizer generated variables, and appends that to NAME if
3410 provided. */
3412 tree
3413 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3415 const char *prefix;
3416 tree new_vect_var;
3418 switch (var_kind)
3420 case vect_simple_var:
3421 prefix = "vect_";
3422 break;
3423 case vect_scalar_var:
3424 prefix = "stmp_";
3425 break;
3426 case vect_pointer_var:
3427 prefix = "vect_p";
3428 break;
3429 default:
3430 gcc_unreachable ();
3433 if (name)
3435 char* tmp = concat (prefix, name, NULL);
3436 new_vect_var = create_tmp_reg (type, tmp);
3437 free (tmp);
3439 else
3440 new_vect_var = create_tmp_reg (type, prefix);
3442 return new_vect_var;
3446 /* Function vect_create_addr_base_for_vector_ref.
3448 Create an expression that computes the address of the first memory location
3449 that will be accessed for a data reference.
3451 Input:
3452 STMT: The statement containing the data reference.
3453 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3454 OFFSET: Optional. If supplied, it is be added to the initial address.
3455 LOOP: Specify relative to which loop-nest should the address be computed.
3456 For example, when the dataref is in an inner-loop nested in an
3457 outer-loop that is now being vectorized, LOOP can be either the
3458 outer-loop, or the inner-loop. The first memory location accessed
3459 by the following dataref ('in' points to short):
3461 for (i=0; i<N; i++)
3462 for (j=0; j<M; j++)
3463 s += in[i+j]
3465 is as follows:
3466 if LOOP=i_loop: &in (relative to i_loop)
3467 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3469 Output:
3470 1. Return an SSA_NAME whose value is the address of the memory location of
3471 the first vector of the data reference.
3472 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3473 these statement(s) which define the returned SSA_NAME.
3475 FORNOW: We are only handling array accesses with step 1. */
3477 tree
3478 vect_create_addr_base_for_vector_ref (gimple stmt,
3479 gimple_seq *new_stmt_list,
3480 tree offset,
3481 struct loop *loop)
3483 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3484 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3485 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3486 tree base_name;
3487 tree data_ref_base_var;
3488 tree vec_stmt;
3489 tree addr_base, addr_expr;
3490 tree dest;
3491 gimple_seq seq = NULL;
3492 tree base_offset = unshare_expr (DR_OFFSET (dr));
3493 tree init = unshare_expr (DR_INIT (dr));
3494 tree vect_ptr_type;
3495 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3496 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3497 tree base;
3499 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3501 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3503 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3505 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3506 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3507 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3510 if (loop_vinfo)
3511 base_name = build_fold_indirect_ref (data_ref_base);
3512 else
3514 base_offset = ssize_int (0);
3515 init = ssize_int (0);
3516 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
3519 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3520 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3521 data_ref_base_var);
3522 gimple_seq_add_seq (new_stmt_list, seq);
3524 /* Create base_offset */
3525 base_offset = size_binop (PLUS_EXPR,
3526 fold_convert (sizetype, base_offset),
3527 fold_convert (sizetype, init));
3528 dest = create_tmp_var (sizetype, "base_off");
3529 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3530 gimple_seq_add_seq (new_stmt_list, seq);
3532 if (offset)
3534 tree tmp = create_tmp_var (sizetype, "offset");
3536 offset = fold_build2 (MULT_EXPR, sizetype,
3537 fold_convert (sizetype, offset), step);
3538 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3539 base_offset, offset);
3540 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3541 gimple_seq_add_seq (new_stmt_list, seq);
3544 /* base + base_offset */
3545 if (loop_vinfo)
3546 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3547 else
3549 addr_base = build1 (ADDR_EXPR,
3550 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3551 unshare_expr (DR_REF (dr)));
3554 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3555 base = get_base_address (DR_REF (dr));
3556 if (base
3557 && TREE_CODE (base) == MEM_REF)
3558 vect_ptr_type
3559 = build_qualified_type (vect_ptr_type,
3560 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3562 vec_stmt = fold_convert (vect_ptr_type, addr_base);
3563 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3564 get_name (base_name));
3565 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3566 gimple_seq_add_seq (new_stmt_list, seq);
3568 if (DR_PTR_INFO (dr)
3569 && TREE_CODE (vec_stmt) == SSA_NAME)
3571 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3572 if (offset)
3573 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (vec_stmt));
3576 if (vect_print_dump_info (REPORT_DETAILS))
3578 fprintf (vect_dump, "created ");
3579 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
3582 return vec_stmt;
3586 /* Function vect_create_data_ref_ptr.
3588 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3589 location accessed in the loop by STMT, along with the def-use update
3590 chain to appropriately advance the pointer through the loop iterations.
3591 Also set aliasing information for the pointer. This pointer is used by
3592 the callers to this function to create a memory reference expression for
3593 vector load/store access.
3595 Input:
3596 1. STMT: a stmt that references memory. Expected to be of the form
3597 GIMPLE_ASSIGN <name, data-ref> or
3598 GIMPLE_ASSIGN <data-ref, name>.
3599 2. AGGR_TYPE: the type of the reference, which should be either a vector
3600 or an array.
3601 3. AT_LOOP: the loop where the vector memref is to be created.
3602 4. OFFSET (optional): an offset to be added to the initial address accessed
3603 by the data-ref in STMT.
3604 5. BSI: location where the new stmts are to be placed if there is no loop
3605 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3606 pointing to the initial address.
3608 Output:
3609 1. Declare a new ptr to vector_type, and have it point to the base of the
3610 data reference (initial addressed accessed by the data reference).
3611 For example, for vector of type V8HI, the following code is generated:
3613 v8hi *ap;
3614 ap = (v8hi *)initial_address;
3616 if OFFSET is not supplied:
3617 initial_address = &a[init];
3618 if OFFSET is supplied:
3619 initial_address = &a[init + OFFSET];
3621 Return the initial_address in INITIAL_ADDRESS.
3623 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3624 update the pointer in each iteration of the loop.
3626 Return the increment stmt that updates the pointer in PTR_INCR.
3628 3. Set INV_P to true if the access pattern of the data reference in the
3629 vectorized loop is invariant. Set it to false otherwise.
3631 4. Return the pointer. */
3633 tree
3634 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3635 tree offset, tree *initial_address,
3636 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3637 bool only_init, bool *inv_p)
3639 tree base_name;
3640 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3641 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3642 struct loop *loop = NULL;
3643 bool nested_in_vect_loop = false;
3644 struct loop *containing_loop = NULL;
3645 tree aggr_ptr_type;
3646 tree aggr_ptr;
3647 tree new_temp;
3648 gimple vec_stmt;
3649 gimple_seq new_stmt_list = NULL;
3650 edge pe = NULL;
3651 basic_block new_bb;
3652 tree aggr_ptr_init;
3653 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3654 tree aptr;
3655 gimple_stmt_iterator incr_gsi;
3656 bool insert_after;
3657 bool negative;
3658 tree indx_before_incr, indx_after_incr;
3659 gimple incr;
3660 tree step;
3661 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3662 tree base;
3664 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3665 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3667 if (loop_vinfo)
3669 loop = LOOP_VINFO_LOOP (loop_vinfo);
3670 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3671 containing_loop = (gimple_bb (stmt))->loop_father;
3672 pe = loop_preheader_edge (loop);
3674 else
3676 gcc_assert (bb_vinfo);
3677 only_init = true;
3678 *ptr_incr = NULL;
3681 /* Check the step (evolution) of the load in LOOP, and record
3682 whether it's invariant. */
3683 if (nested_in_vect_loop)
3684 step = STMT_VINFO_DR_STEP (stmt_info);
3685 else
3686 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3688 if (tree_int_cst_compare (step, size_zero_node) == 0)
3689 *inv_p = true;
3690 else
3691 *inv_p = false;
3692 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3694 /* Create an expression for the first address accessed by this load
3695 in LOOP. */
3696 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3698 if (vect_print_dump_info (REPORT_DETAILS))
3700 tree data_ref_base = base_name;
3701 fprintf (vect_dump, "create %s-pointer variable to type: ",
3702 tree_code_name[(int) TREE_CODE (aggr_type)]);
3703 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3704 if (TREE_CODE (data_ref_base) == VAR_DECL
3705 || TREE_CODE (data_ref_base) == ARRAY_REF)
3706 fprintf (vect_dump, " vectorizing an array ref: ");
3707 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3708 fprintf (vect_dump, " vectorizing a record based array ref: ");
3709 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3710 fprintf (vect_dump, " vectorizing a pointer ref: ");
3711 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3714 /* (1) Create the new aggregate-pointer variable. */
3715 aggr_ptr_type = build_pointer_type (aggr_type);
3716 base = get_base_address (DR_REF (dr));
3717 if (base
3718 && TREE_CODE (base) == MEM_REF)
3719 aggr_ptr_type
3720 = build_qualified_type (aggr_ptr_type,
3721 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3722 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3723 get_name (base_name));
3725 /* Vector and array types inherit the alias set of their component
3726 type by default so we need to use a ref-all pointer if the data
3727 reference does not conflict with the created aggregated data
3728 reference because it is not addressable. */
3729 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3730 get_alias_set (DR_REF (dr))))
3732 aggr_ptr_type
3733 = build_pointer_type_for_mode (aggr_type,
3734 TYPE_MODE (aggr_ptr_type), true);
3735 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3736 get_name (base_name));
3739 /* Likewise for any of the data references in the stmt group. */
3740 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3742 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3745 tree lhs = gimple_assign_lhs (orig_stmt);
3746 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3747 get_alias_set (lhs)))
3749 aggr_ptr_type
3750 = build_pointer_type_for_mode (aggr_type,
3751 TYPE_MODE (aggr_ptr_type), true);
3752 aggr_ptr
3753 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3754 get_name (base_name));
3755 break;
3758 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3760 while (orig_stmt);
3763 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3764 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3765 def-use update cycles for the pointer: one relative to the outer-loop
3766 (LOOP), which is what steps (3) and (4) below do. The other is relative
3767 to the inner-loop (which is the inner-most loop containing the dataref),
3768 and this is done be step (5) below.
3770 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3771 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3772 redundant. Steps (3),(4) create the following:
3774 vp0 = &base_addr;
3775 LOOP: vp1 = phi(vp0,vp2)
3778 vp2 = vp1 + step
3779 goto LOOP
3781 If there is an inner-loop nested in loop, then step (5) will also be
3782 applied, and an additional update in the inner-loop will be created:
3784 vp0 = &base_addr;
3785 LOOP: vp1 = phi(vp0,vp2)
3787 inner: vp3 = phi(vp1,vp4)
3788 vp4 = vp3 + inner_step
3789 if () goto inner
3791 vp2 = vp1 + step
3792 if () goto LOOP */
3794 /* (2) Calculate the initial address of the aggregate-pointer, and set
3795 the aggregate-pointer to point to it before the loop. */
3797 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3799 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3800 offset, loop);
3801 if (new_stmt_list)
3803 if (pe)
3805 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3806 gcc_assert (!new_bb);
3808 else
3809 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3812 *initial_address = new_temp;
3814 /* Create: p = (aggr_type *) initial_base */
3815 if (TREE_CODE (new_temp) != SSA_NAME
3816 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3818 vec_stmt = gimple_build_assign (aggr_ptr,
3819 fold_convert (aggr_ptr_type, new_temp));
3820 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3821 /* Copy the points-to information if it exists. */
3822 if (DR_PTR_INFO (dr))
3823 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3824 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3825 if (pe)
3827 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3828 gcc_assert (!new_bb);
3830 else
3831 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3833 else
3834 aggr_ptr_init = new_temp;
3836 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3837 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3838 inner-loop nested in LOOP (during outer-loop vectorization). */
3840 /* No update in loop is required. */
3841 if (only_init && (!loop_vinfo || at_loop == loop))
3842 aptr = aggr_ptr_init;
3843 else
3845 /* The step of the aggregate pointer is the type size. */
3846 tree step = TYPE_SIZE_UNIT (aggr_type);
3847 /* One exception to the above is when the scalar step of the load in
3848 LOOP is zero. In this case the step here is also zero. */
3849 if (*inv_p)
3850 step = size_zero_node;
3851 else if (negative)
3852 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3854 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3856 create_iv (aggr_ptr_init,
3857 fold_convert (aggr_ptr_type, step),
3858 aggr_ptr, loop, &incr_gsi, insert_after,
3859 &indx_before_incr, &indx_after_incr);
3860 incr = gsi_stmt (incr_gsi);
3861 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3863 /* Copy the points-to information if it exists. */
3864 if (DR_PTR_INFO (dr))
3866 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3867 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3869 if (ptr_incr)
3870 *ptr_incr = incr;
3872 aptr = indx_before_incr;
3875 if (!nested_in_vect_loop || only_init)
3876 return aptr;
3879 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3880 nested in LOOP, if exists. */
3882 gcc_assert (nested_in_vect_loop);
3883 if (!only_init)
3885 standard_iv_increment_position (containing_loop, &incr_gsi,
3886 &insert_after);
3887 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3888 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3889 &indx_after_incr);
3890 incr = gsi_stmt (incr_gsi);
3891 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3893 /* Copy the points-to information if it exists. */
3894 if (DR_PTR_INFO (dr))
3896 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3897 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3899 if (ptr_incr)
3900 *ptr_incr = incr;
3902 return indx_before_incr;
3904 else
3905 gcc_unreachable ();
3909 /* Function bump_vector_ptr
3911 Increment a pointer (to a vector type) by vector-size. If requested,
3912 i.e. if PTR-INCR is given, then also connect the new increment stmt
3913 to the existing def-use update-chain of the pointer, by modifying
3914 the PTR_INCR as illustrated below:
3916 The pointer def-use update-chain before this function:
3917 DATAREF_PTR = phi (p_0, p_2)
3918 ....
3919 PTR_INCR: p_2 = DATAREF_PTR + step
3921 The pointer def-use update-chain after this function:
3922 DATAREF_PTR = phi (p_0, p_2)
3923 ....
3924 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3925 ....
3926 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3928 Input:
3929 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3930 in the loop.
3931 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3932 the loop. The increment amount across iterations is expected
3933 to be vector_size.
3934 BSI - location where the new update stmt is to be placed.
3935 STMT - the original scalar memory-access stmt that is being vectorized.
3936 BUMP - optional. The offset by which to bump the pointer. If not given,
3937 the offset is assumed to be vector_size.
3939 Output: Return NEW_DATAREF_PTR as illustrated above.
3943 tree
3944 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3945 gimple stmt, tree bump)
3947 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3948 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3949 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3950 tree update = TYPE_SIZE_UNIT (vectype);
3951 gimple incr_stmt;
3952 ssa_op_iter iter;
3953 use_operand_p use_p;
3954 tree new_dataref_ptr;
3956 if (bump)
3957 update = bump;
3959 new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
3960 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
3961 dataref_ptr, update);
3962 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3964 /* Copy the points-to information if it exists. */
3965 if (DR_PTR_INFO (dr))
3967 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3968 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
3971 if (!ptr_incr)
3972 return new_dataref_ptr;
3974 /* Update the vector-pointer's cross-iteration increment. */
3975 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3977 tree use = USE_FROM_PTR (use_p);
3979 if (use == dataref_ptr)
3980 SET_USE (use_p, new_dataref_ptr);
3981 else
3982 gcc_assert (tree_int_cst_compare (use, update) == 0);
3985 return new_dataref_ptr;
3989 /* Function vect_create_destination_var.
3991 Create a new temporary of type VECTYPE. */
3993 tree
3994 vect_create_destination_var (tree scalar_dest, tree vectype)
3996 tree vec_dest;
3997 const char *new_name;
3998 tree type;
3999 enum vect_var_kind kind;
4001 kind = vectype ? vect_simple_var : vect_scalar_var;
4002 type = vectype ? vectype : TREE_TYPE (scalar_dest);
4004 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4006 new_name = get_name (scalar_dest);
4007 if (!new_name)
4008 new_name = "var_";
4009 vec_dest = vect_get_new_vect_var (type, kind, new_name);
4011 return vec_dest;
4014 /* Function vect_grouped_store_supported.
4016 Returns TRUE if interleave high and interleave low permutations
4017 are supported, and FALSE otherwise. */
4019 bool
4020 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
4022 enum machine_mode mode = TYPE_MODE (vectype);
4024 /* vect_permute_store_chain requires the group size to be a power of two. */
4025 if (exact_log2 (count) == -1)
4027 if (vect_print_dump_info (REPORT_DETAILS))
4028 fprintf (vect_dump, "the size of the group of accesses"
4029 " is not a power of 2");
4030 return false;
4033 /* Check that the permutation is supported. */
4034 if (VECTOR_MODE_P (mode))
4036 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4037 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4038 for (i = 0; i < nelt / 2; i++)
4040 sel[i * 2] = i;
4041 sel[i * 2 + 1] = i + nelt;
4043 if (can_vec_perm_p (mode, false, sel))
4045 for (i = 0; i < nelt; i++)
4046 sel[i] += nelt / 2;
4047 if (can_vec_perm_p (mode, false, sel))
4048 return true;
4052 if (vect_print_dump_info (REPORT_DETAILS))
4053 fprintf (vect_dump, "interleave op not supported by target.");
4054 return false;
4058 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
4059 type VECTYPE. */
4061 bool
4062 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4064 return vect_lanes_optab_supported_p ("vec_store_lanes",
4065 vec_store_lanes_optab,
4066 vectype, count);
4070 /* Function vect_permute_store_chain.
4072 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4073 a power of 2, generate interleave_high/low stmts to reorder the data
4074 correctly for the stores. Return the final references for stores in
4075 RESULT_CHAIN.
4077 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4078 The input is 4 vectors each containing 8 elements. We assign a number to
4079 each element, the input sequence is:
4081 1st vec: 0 1 2 3 4 5 6 7
4082 2nd vec: 8 9 10 11 12 13 14 15
4083 3rd vec: 16 17 18 19 20 21 22 23
4084 4th vec: 24 25 26 27 28 29 30 31
4086 The output sequence should be:
4088 1st vec: 0 8 16 24 1 9 17 25
4089 2nd vec: 2 10 18 26 3 11 19 27
4090 3rd vec: 4 12 20 28 5 13 21 30
4091 4th vec: 6 14 22 30 7 15 23 31
4093 i.e., we interleave the contents of the four vectors in their order.
4095 We use interleave_high/low instructions to create such output. The input of
4096 each interleave_high/low operation is two vectors:
4097 1st vec 2nd vec
4098 0 1 2 3 4 5 6 7
4099 the even elements of the result vector are obtained left-to-right from the
4100 high/low elements of the first vector. The odd elements of the result are
4101 obtained left-to-right from the high/low elements of the second vector.
4102 The output of interleave_high will be: 0 4 1 5
4103 and of interleave_low: 2 6 3 7
4106 The permutation is done in log LENGTH stages. In each stage interleave_high
4107 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4108 where the first argument is taken from the first half of DR_CHAIN and the
4109 second argument from it's second half.
4110 In our example,
4112 I1: interleave_high (1st vec, 3rd vec)
4113 I2: interleave_low (1st vec, 3rd vec)
4114 I3: interleave_high (2nd vec, 4th vec)
4115 I4: interleave_low (2nd vec, 4th vec)
4117 The output for the first stage is:
4119 I1: 0 16 1 17 2 18 3 19
4120 I2: 4 20 5 21 6 22 7 23
4121 I3: 8 24 9 25 10 26 11 27
4122 I4: 12 28 13 29 14 30 15 31
4124 The output of the second stage, i.e. the final result is:
4126 I1: 0 8 16 24 1 9 17 25
4127 I2: 2 10 18 26 3 11 19 27
4128 I3: 4 12 20 28 5 13 21 30
4129 I4: 6 14 22 30 7 15 23 31. */
4131 void
4132 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
4133 unsigned int length,
4134 gimple stmt,
4135 gimple_stmt_iterator *gsi,
4136 VEC(tree,heap) **result_chain)
4138 tree vect1, vect2, high, low;
4139 gimple perm_stmt;
4140 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4141 tree perm_mask_low, perm_mask_high;
4142 unsigned int i, n;
4143 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
4144 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4146 *result_chain = VEC_copy (tree, heap, dr_chain);
4148 for (i = 0, n = nelt / 2; i < n; i++)
4150 sel[i * 2] = i;
4151 sel[i * 2 + 1] = i + nelt;
4153 perm_mask_high = vect_gen_perm_mask (vectype, sel);
4154 gcc_assert (perm_mask_high != NULL);
4156 for (i = 0; i < nelt; i++)
4157 sel[i] += nelt / 2;
4158 perm_mask_low = vect_gen_perm_mask (vectype, sel);
4159 gcc_assert (perm_mask_low != NULL);
4161 for (i = 0, n = exact_log2 (length); i < n; i++)
4163 for (j = 0; j < length/2; j++)
4165 vect1 = VEC_index (tree, dr_chain, j);
4166 vect2 = VEC_index (tree, dr_chain, j+length/2);
4168 /* Create interleaving stmt:
4169 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4170 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
4171 perm_stmt
4172 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, high,
4173 vect1, vect2, perm_mask_high);
4174 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4175 VEC_replace (tree, *result_chain, 2*j, high);
4177 /* Create interleaving stmt:
4178 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4179 nelt*3/2+1, ...}> */
4180 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
4181 perm_stmt
4182 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, low,
4183 vect1, vect2, perm_mask_low);
4184 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4185 VEC_replace (tree, *result_chain, 2*j+1, low);
4187 dr_chain = VEC_copy (tree, heap, *result_chain);
4191 /* Function vect_setup_realignment
4193 This function is called when vectorizing an unaligned load using
4194 the dr_explicit_realign[_optimized] scheme.
4195 This function generates the following code at the loop prolog:
4197 p = initial_addr;
4198 x msq_init = *(floor(p)); # prolog load
4199 realignment_token = call target_builtin;
4200 loop:
4201 x msq = phi (msq_init, ---)
4203 The stmts marked with x are generated only for the case of
4204 dr_explicit_realign_optimized.
4206 The code above sets up a new (vector) pointer, pointing to the first
4207 location accessed by STMT, and a "floor-aligned" load using that pointer.
4208 It also generates code to compute the "realignment-token" (if the relevant
4209 target hook was defined), and creates a phi-node at the loop-header bb
4210 whose arguments are the result of the prolog-load (created by this
4211 function) and the result of a load that takes place in the loop (to be
4212 created by the caller to this function).
4214 For the case of dr_explicit_realign_optimized:
4215 The caller to this function uses the phi-result (msq) to create the
4216 realignment code inside the loop, and sets up the missing phi argument,
4217 as follows:
4218 loop:
4219 msq = phi (msq_init, lsq)
4220 lsq = *(floor(p')); # load in loop
4221 result = realign_load (msq, lsq, realignment_token);
4223 For the case of dr_explicit_realign:
4224 loop:
4225 msq = *(floor(p)); # load in loop
4226 p' = p + (VS-1);
4227 lsq = *(floor(p')); # load in loop
4228 result = realign_load (msq, lsq, realignment_token);
4230 Input:
4231 STMT - (scalar) load stmt to be vectorized. This load accesses
4232 a memory location that may be unaligned.
4233 BSI - place where new code is to be inserted.
4234 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4235 is used.
4237 Output:
4238 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4239 target hook, if defined.
4240 Return value - the result of the loop-header phi node. */
4242 tree
4243 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4244 tree *realignment_token,
4245 enum dr_alignment_support alignment_support_scheme,
4246 tree init_addr,
4247 struct loop **at_loop)
4249 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4250 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4251 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4252 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4253 struct loop *loop = NULL;
4254 edge pe = NULL;
4255 tree scalar_dest = gimple_assign_lhs (stmt);
4256 tree vec_dest;
4257 gimple inc;
4258 tree ptr;
4259 tree data_ref;
4260 gimple new_stmt;
4261 basic_block new_bb;
4262 tree msq_init = NULL_TREE;
4263 tree new_temp;
4264 gimple phi_stmt;
4265 tree msq = NULL_TREE;
4266 gimple_seq stmts = NULL;
4267 bool inv_p;
4268 bool compute_in_loop = false;
4269 bool nested_in_vect_loop = false;
4270 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4271 struct loop *loop_for_initial_load = NULL;
4273 if (loop_vinfo)
4275 loop = LOOP_VINFO_LOOP (loop_vinfo);
4276 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4279 gcc_assert (alignment_support_scheme == dr_explicit_realign
4280 || alignment_support_scheme == dr_explicit_realign_optimized);
4282 /* We need to generate three things:
4283 1. the misalignment computation
4284 2. the extra vector load (for the optimized realignment scheme).
4285 3. the phi node for the two vectors from which the realignment is
4286 done (for the optimized realignment scheme). */
4288 /* 1. Determine where to generate the misalignment computation.
4290 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4291 calculation will be generated by this function, outside the loop (in the
4292 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4293 caller, inside the loop.
4295 Background: If the misalignment remains fixed throughout the iterations of
4296 the loop, then both realignment schemes are applicable, and also the
4297 misalignment computation can be done outside LOOP. This is because we are
4298 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4299 are a multiple of VS (the Vector Size), and therefore the misalignment in
4300 different vectorized LOOP iterations is always the same.
4301 The problem arises only if the memory access is in an inner-loop nested
4302 inside LOOP, which is now being vectorized using outer-loop vectorization.
4303 This is the only case when the misalignment of the memory access may not
4304 remain fixed throughout the iterations of the inner-loop (as explained in
4305 detail in vect_supportable_dr_alignment). In this case, not only is the
4306 optimized realignment scheme not applicable, but also the misalignment
4307 computation (and generation of the realignment token that is passed to
4308 REALIGN_LOAD) have to be done inside the loop.
4310 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4311 or not, which in turn determines if the misalignment is computed inside
4312 the inner-loop, or outside LOOP. */
4314 if (init_addr != NULL_TREE || !loop_vinfo)
4316 compute_in_loop = true;
4317 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4321 /* 2. Determine where to generate the extra vector load.
4323 For the optimized realignment scheme, instead of generating two vector
4324 loads in each iteration, we generate a single extra vector load in the
4325 preheader of the loop, and in each iteration reuse the result of the
4326 vector load from the previous iteration. In case the memory access is in
4327 an inner-loop nested inside LOOP, which is now being vectorized using
4328 outer-loop vectorization, we need to determine whether this initial vector
4329 load should be generated at the preheader of the inner-loop, or can be
4330 generated at the preheader of LOOP. If the memory access has no evolution
4331 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4332 to be generated inside LOOP (in the preheader of the inner-loop). */
4334 if (nested_in_vect_loop)
4336 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4337 bool invariant_in_outerloop =
4338 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4339 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4341 else
4342 loop_for_initial_load = loop;
4343 if (at_loop)
4344 *at_loop = loop_for_initial_load;
4346 if (loop_for_initial_load)
4347 pe = loop_preheader_edge (loop_for_initial_load);
4349 /* 3. For the case of the optimized realignment, create the first vector
4350 load at the loop preheader. */
4352 if (alignment_support_scheme == dr_explicit_realign_optimized)
4354 /* Create msq_init = *(floor(p1)) in the loop preheader */
4356 gcc_assert (!compute_in_loop);
4357 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4358 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4359 NULL_TREE, &init_addr, NULL, &inc,
4360 true, &inv_p);
4361 new_temp = copy_ssa_name (ptr, NULL);
4362 new_stmt = gimple_build_assign_with_ops
4363 (BIT_AND_EXPR, new_temp, ptr,
4364 build_int_cst (TREE_TYPE (ptr),
4365 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4366 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4367 gcc_assert (!new_bb);
4368 data_ref
4369 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4370 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4371 new_stmt = gimple_build_assign (vec_dest, data_ref);
4372 new_temp = make_ssa_name (vec_dest, new_stmt);
4373 gimple_assign_set_lhs (new_stmt, new_temp);
4374 if (pe)
4376 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4377 gcc_assert (!new_bb);
4379 else
4380 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4382 msq_init = gimple_assign_lhs (new_stmt);
4385 /* 4. Create realignment token using a target builtin, if available.
4386 It is done either inside the containing loop, or before LOOP (as
4387 determined above). */
4389 if (targetm.vectorize.builtin_mask_for_load)
4391 tree builtin_decl;
4393 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4394 if (!init_addr)
4396 /* Generate the INIT_ADDR computation outside LOOP. */
4397 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4398 NULL_TREE, loop);
4399 if (loop)
4401 pe = loop_preheader_edge (loop);
4402 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4403 gcc_assert (!new_bb);
4405 else
4406 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4409 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4410 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4411 vec_dest =
4412 vect_create_destination_var (scalar_dest,
4413 gimple_call_return_type (new_stmt));
4414 new_temp = make_ssa_name (vec_dest, new_stmt);
4415 gimple_call_set_lhs (new_stmt, new_temp);
4417 if (compute_in_loop)
4418 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4419 else
4421 /* Generate the misalignment computation outside LOOP. */
4422 pe = loop_preheader_edge (loop);
4423 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4424 gcc_assert (!new_bb);
4427 *realignment_token = gimple_call_lhs (new_stmt);
4429 /* The result of the CALL_EXPR to this builtin is determined from
4430 the value of the parameter and no global variables are touched
4431 which makes the builtin a "const" function. Requiring the
4432 builtin to have the "const" attribute makes it unnecessary
4433 to call mark_call_clobbered. */
4434 gcc_assert (TREE_READONLY (builtin_decl));
4437 if (alignment_support_scheme == dr_explicit_realign)
4438 return msq;
4440 gcc_assert (!compute_in_loop);
4441 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4444 /* 5. Create msq = phi <msq_init, lsq> in loop */
4446 pe = loop_preheader_edge (containing_loop);
4447 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4448 msq = make_ssa_name (vec_dest, NULL);
4449 phi_stmt = create_phi_node (msq, containing_loop->header);
4450 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4452 return msq;
4456 /* Function vect_grouped_load_supported.
4458 Returns TRUE if even and odd permutations are supported,
4459 and FALSE otherwise. */
4461 bool
4462 vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4464 enum machine_mode mode = TYPE_MODE (vectype);
4466 /* vect_permute_load_chain requires the group size to be a power of two. */
4467 if (exact_log2 (count) == -1)
4469 if (vect_print_dump_info (REPORT_DETAILS))
4470 fprintf (vect_dump, "the size of the group of accesses"
4471 " is not a power of 2");
4472 return false;
4475 /* Check that the permutation is supported. */
4476 if (VECTOR_MODE_P (mode))
4478 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4479 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4481 for (i = 0; i < nelt; i++)
4482 sel[i] = i * 2;
4483 if (can_vec_perm_p (mode, false, sel))
4485 for (i = 0; i < nelt; i++)
4486 sel[i] = i * 2 + 1;
4487 if (can_vec_perm_p (mode, false, sel))
4488 return true;
4492 if (vect_print_dump_info (REPORT_DETAILS))
4493 fprintf (vect_dump, "extract even/odd not supported by target");
4494 return false;
4497 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4498 type VECTYPE. */
4500 bool
4501 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4503 return vect_lanes_optab_supported_p ("vec_load_lanes",
4504 vec_load_lanes_optab,
4505 vectype, count);
4508 /* Function vect_permute_load_chain.
4510 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4511 a power of 2, generate extract_even/odd stmts to reorder the input data
4512 correctly. Return the final references for loads in RESULT_CHAIN.
4514 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4515 The input is 4 vectors each containing 8 elements. We assign a number to each
4516 element, the input sequence is:
4518 1st vec: 0 1 2 3 4 5 6 7
4519 2nd vec: 8 9 10 11 12 13 14 15
4520 3rd vec: 16 17 18 19 20 21 22 23
4521 4th vec: 24 25 26 27 28 29 30 31
4523 The output sequence should be:
4525 1st vec: 0 4 8 12 16 20 24 28
4526 2nd vec: 1 5 9 13 17 21 25 29
4527 3rd vec: 2 6 10 14 18 22 26 30
4528 4th vec: 3 7 11 15 19 23 27 31
4530 i.e., the first output vector should contain the first elements of each
4531 interleaving group, etc.
4533 We use extract_even/odd instructions to create such output. The input of
4534 each extract_even/odd operation is two vectors
4535 1st vec 2nd vec
4536 0 1 2 3 4 5 6 7
4538 and the output is the vector of extracted even/odd elements. The output of
4539 extract_even will be: 0 2 4 6
4540 and of extract_odd: 1 3 5 7
4543 The permutation is done in log LENGTH stages. In each stage extract_even
4544 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4545 their order. In our example,
4547 E1: extract_even (1st vec, 2nd vec)
4548 E2: extract_odd (1st vec, 2nd vec)
4549 E3: extract_even (3rd vec, 4th vec)
4550 E4: extract_odd (3rd vec, 4th vec)
4552 The output for the first stage will be:
4554 E1: 0 2 4 6 8 10 12 14
4555 E2: 1 3 5 7 9 11 13 15
4556 E3: 16 18 20 22 24 26 28 30
4557 E4: 17 19 21 23 25 27 29 31
4559 In order to proceed and create the correct sequence for the next stage (or
4560 for the correct output, if the second stage is the last one, as in our
4561 example), we first put the output of extract_even operation and then the
4562 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4563 The input for the second stage is:
4565 1st vec (E1): 0 2 4 6 8 10 12 14
4566 2nd vec (E3): 16 18 20 22 24 26 28 30
4567 3rd vec (E2): 1 3 5 7 9 11 13 15
4568 4th vec (E4): 17 19 21 23 25 27 29 31
4570 The output of the second stage:
4572 E1: 0 4 8 12 16 20 24 28
4573 E2: 2 6 10 14 18 22 26 30
4574 E3: 1 5 9 13 17 21 25 29
4575 E4: 3 7 11 15 19 23 27 31
4577 And RESULT_CHAIN after reordering:
4579 1st vec (E1): 0 4 8 12 16 20 24 28
4580 2nd vec (E3): 1 5 9 13 17 21 25 29
4581 3rd vec (E2): 2 6 10 14 18 22 26 30
4582 4th vec (E4): 3 7 11 15 19 23 27 31. */
4584 static void
4585 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4586 unsigned int length,
4587 gimple stmt,
4588 gimple_stmt_iterator *gsi,
4589 VEC(tree,heap) **result_chain)
4591 tree data_ref, first_vect, second_vect;
4592 tree perm_mask_even, perm_mask_odd;
4593 gimple perm_stmt;
4594 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4595 unsigned int i, j, log_length = exact_log2 (length);
4596 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4597 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4599 *result_chain = VEC_copy (tree, heap, dr_chain);
4601 for (i = 0; i < nelt; ++i)
4602 sel[i] = i * 2;
4603 perm_mask_even = vect_gen_perm_mask (vectype, sel);
4604 gcc_assert (perm_mask_even != NULL);
4606 for (i = 0; i < nelt; ++i)
4607 sel[i] = i * 2 + 1;
4608 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4609 gcc_assert (perm_mask_odd != NULL);
4611 for (i = 0; i < log_length; i++)
4613 for (j = 0; j < length; j += 2)
4615 first_vect = VEC_index (tree, dr_chain, j);
4616 second_vect = VEC_index (tree, dr_chain, j+1);
4618 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4619 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
4620 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4621 first_vect, second_vect,
4622 perm_mask_even);
4623 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4624 VEC_replace (tree, *result_chain, j/2, data_ref);
4626 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4627 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
4628 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4629 first_vect, second_vect,
4630 perm_mask_odd);
4631 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4632 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4634 dr_chain = VEC_copy (tree, heap, *result_chain);
4639 /* Function vect_transform_grouped_load.
4641 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4642 to perform their permutation and ascribe the result vectorized statements to
4643 the scalar statements.
4646 void
4647 vect_transform_grouped_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4648 gimple_stmt_iterator *gsi)
4650 VEC(tree,heap) *result_chain = NULL;
4652 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4653 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4654 vectors, that are ready for vector computation. */
4655 result_chain = VEC_alloc (tree, heap, size);
4656 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4657 vect_record_grouped_load_vectors (stmt, result_chain);
4658 VEC_free (tree, heap, result_chain);
4661 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4662 generated as part of the vectorization of STMT. Assign the statement
4663 for each vector to the associated scalar statement. */
4665 void
4666 vect_record_grouped_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4668 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4669 gimple next_stmt, new_stmt;
4670 unsigned int i, gap_count;
4671 tree tmp_data_ref;
4673 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4674 Since we scan the chain starting from it's first node, their order
4675 corresponds the order of data-refs in RESULT_CHAIN. */
4676 next_stmt = first_stmt;
4677 gap_count = 1;
4678 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4680 if (!next_stmt)
4681 break;
4683 /* Skip the gaps. Loads created for the gaps will be removed by dead
4684 code elimination pass later. No need to check for the first stmt in
4685 the group, since it always exists.
4686 GROUP_GAP is the number of steps in elements from the previous
4687 access (if there is no gap GROUP_GAP is 1). We skip loads that
4688 correspond to the gaps. */
4689 if (next_stmt != first_stmt
4690 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4692 gap_count++;
4693 continue;
4696 while (next_stmt)
4698 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4699 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4700 copies, and we put the new vector statement in the first available
4701 RELATED_STMT. */
4702 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4703 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4704 else
4706 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4708 gimple prev_stmt =
4709 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4710 gimple rel_stmt =
4711 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4712 while (rel_stmt)
4714 prev_stmt = rel_stmt;
4715 rel_stmt =
4716 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4719 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4720 new_stmt;
4724 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4725 gap_count = 1;
4726 /* If NEXT_STMT accesses the same DR as the previous statement,
4727 put the same TMP_DATA_REF as its vectorized statement; otherwise
4728 get the next data-ref from RESULT_CHAIN. */
4729 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4730 break;
4735 /* Function vect_force_dr_alignment_p.
4737 Returns whether the alignment of a DECL can be forced to be aligned
4738 on ALIGNMENT bit boundary. */
4740 bool
4741 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4743 if (TREE_CODE (decl) != VAR_DECL)
4744 return false;
4746 /* We cannot change alignment of common or external symbols as another
4747 translation unit may contain a definition with lower alignment.
4748 The rules of common symbol linking mean that the definition
4749 will override the common symbol. */
4750 if (DECL_EXTERNAL (decl)
4751 || DECL_COMMON (decl))
4752 return false;
4754 if (TREE_ASM_WRITTEN (decl))
4755 return false;
4757 /* Do not override the alignment as specified by the ABI when the used
4758 attribute is set. */
4759 if (DECL_PRESERVE_P (decl))
4760 return false;
4762 if (TREE_STATIC (decl))
4763 return (alignment <= MAX_OFILE_ALIGNMENT);
4764 else
4765 return (alignment <= MAX_STACK_ALIGNMENT);
4769 /* Return whether the data reference DR is supported with respect to its
4770 alignment.
4771 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4772 it is aligned, i.e., check if it is possible to vectorize it with different
4773 alignment. */
4775 enum dr_alignment_support
4776 vect_supportable_dr_alignment (struct data_reference *dr,
4777 bool check_aligned_accesses)
4779 gimple stmt = DR_STMT (dr);
4780 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4781 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4782 enum machine_mode mode = TYPE_MODE (vectype);
4783 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4784 struct loop *vect_loop = NULL;
4785 bool nested_in_vect_loop = false;
4787 if (aligned_access_p (dr) && !check_aligned_accesses)
4788 return dr_aligned;
4790 if (loop_vinfo)
4792 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4793 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4796 /* Possibly unaligned access. */
4798 /* We can choose between using the implicit realignment scheme (generating
4799 a misaligned_move stmt) and the explicit realignment scheme (generating
4800 aligned loads with a REALIGN_LOAD). There are two variants to the
4801 explicit realignment scheme: optimized, and unoptimized.
4802 We can optimize the realignment only if the step between consecutive
4803 vector loads is equal to the vector size. Since the vector memory
4804 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4805 is guaranteed that the misalignment amount remains the same throughout the
4806 execution of the vectorized loop. Therefore, we can create the
4807 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4808 at the loop preheader.
4810 However, in the case of outer-loop vectorization, when vectorizing a
4811 memory access in the inner-loop nested within the LOOP that is now being
4812 vectorized, while it is guaranteed that the misalignment of the
4813 vectorized memory access will remain the same in different outer-loop
4814 iterations, it is *not* guaranteed that is will remain the same throughout
4815 the execution of the inner-loop. This is because the inner-loop advances
4816 with the original scalar step (and not in steps of VS). If the inner-loop
4817 step happens to be a multiple of VS, then the misalignment remains fixed
4818 and we can use the optimized realignment scheme. For example:
4820 for (i=0; i<N; i++)
4821 for (j=0; j<M; j++)
4822 s += a[i+j];
4824 When vectorizing the i-loop in the above example, the step between
4825 consecutive vector loads is 1, and so the misalignment does not remain
4826 fixed across the execution of the inner-loop, and the realignment cannot
4827 be optimized (as illustrated in the following pseudo vectorized loop):
4829 for (i=0; i<N; i+=4)
4830 for (j=0; j<M; j++){
4831 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4832 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4833 // (assuming that we start from an aligned address).
4836 We therefore have to use the unoptimized realignment scheme:
4838 for (i=0; i<N; i+=4)
4839 for (j=k; j<M; j+=4)
4840 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4841 // that the misalignment of the initial address is
4842 // 0).
4844 The loop can then be vectorized as follows:
4846 for (k=0; k<4; k++){
4847 rt = get_realignment_token (&vp[k]);
4848 for (i=0; i<N; i+=4){
4849 v1 = vp[i+k];
4850 for (j=k; j<M; j+=4){
4851 v2 = vp[i+j+VS-1];
4852 va = REALIGN_LOAD <v1,v2,rt>;
4853 vs += va;
4854 v1 = v2;
4857 } */
4859 if (DR_IS_READ (dr))
4861 bool is_packed = false;
4862 tree type = (TREE_TYPE (DR_REF (dr)));
4864 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4865 && (!targetm.vectorize.builtin_mask_for_load
4866 || targetm.vectorize.builtin_mask_for_load ()))
4868 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4869 if ((nested_in_vect_loop
4870 && (TREE_INT_CST_LOW (DR_STEP (dr))
4871 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4872 || !loop_vinfo)
4873 return dr_explicit_realign;
4874 else
4875 return dr_explicit_realign_optimized;
4877 if (!known_alignment_for_access_p (dr))
4878 is_packed = not_size_aligned (DR_REF (dr));
4880 if (targetm.vectorize.
4881 support_vector_misalignment (mode, type,
4882 DR_MISALIGNMENT (dr), is_packed))
4883 /* Can't software pipeline the loads, but can at least do them. */
4884 return dr_unaligned_supported;
4886 else
4888 bool is_packed = false;
4889 tree type = (TREE_TYPE (DR_REF (dr)));
4891 if (!known_alignment_for_access_p (dr))
4892 is_packed = not_size_aligned (DR_REF (dr));
4894 if (targetm.vectorize.
4895 support_vector_misalignment (mode, type,
4896 DR_MISALIGNMENT (dr), is_packed))
4897 return dr_unaligned_supported;
4900 /* Unsupported. */
4901 return dr_unaligned_unsupported;