Merge trunk version 195164 into gupc branch.
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob8a772750c681c8d7156e3218bad2e3138fbd44db
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "dumpfile.h"
35 #include "cfgloop.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
39 #include "diagnostic-core.h"
41 /* Need to include rtl.h, expr.h, etc. for optabs. */
42 #include "expr.h"
43 #include "optabs.h"
45 /* Return true if load- or store-lanes optab OPTAB is implemented for
46 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
48 static bool
49 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
50 tree vectype, unsigned HOST_WIDE_INT count)
52 enum machine_mode mode, array_mode;
53 bool limit_p;
55 mode = TYPE_MODE (vectype);
56 limit_p = !targetm.array_mode_supported_p (mode, count);
57 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
58 MODE_INT, limit_p);
60 if (array_mode == BLKmode)
62 if (dump_enabled_p ())
63 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
64 "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65 GET_MODE_NAME (mode), count);
66 return false;
69 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
71 if (dump_enabled_p ())
72 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
73 "cannot use %s<%s><%s>", name,
74 GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
75 return false;
78 if (dump_enabled_p ())
79 dump_printf_loc (MSG_NOTE, vect_location,
80 "can use %s<%s><%s>", name, GET_MODE_NAME (array_mode),
81 GET_MODE_NAME (mode));
83 return true;
87 /* Return the smallest scalar part of STMT.
88 This is used to determine the vectype of the stmt. We generally set the
89 vectype according to the type of the result (lhs). For stmts whose
90 result-type is different than the type of the arguments (e.g., demotion,
91 promotion), vectype will be reset appropriately (later). Note that we have
92 to visit the smallest datatype in this function, because that determines the
93 VF. If the smallest datatype in the loop is present only as the rhs of a
94 promotion operation - we'd miss it.
95 Such a case, where a variable of this datatype does not appear in the lhs
96 anywhere in the loop, can only occur if it's an invariant: e.g.:
97 'int_x = (int) short_inv', which we'd expect to have been optimized away by
98 invariant motion. However, we cannot rely on invariant motion to always
99 take invariants out of the loop, and so in the case of promotion we also
100 have to check the rhs.
101 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
102 types. */
104 tree
105 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
106 HOST_WIDE_INT *rhs_size_unit)
108 tree scalar_type = gimple_expr_type (stmt);
109 HOST_WIDE_INT lhs, rhs;
111 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
113 if (is_gimple_assign (stmt)
114 && (gimple_assign_cast_p (stmt)
115 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
116 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
117 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
119 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
121 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
122 if (rhs < lhs)
123 scalar_type = rhs_type;
126 *lhs_size_unit = lhs;
127 *rhs_size_unit = rhs;
128 return scalar_type;
132 /* Find the place of the data-ref in STMT in the interleaving chain that starts
133 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
136 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
138 gimple next_stmt = first_stmt;
139 int result = 0;
141 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
142 return -1;
144 while (next_stmt && next_stmt != stmt)
146 result++;
147 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
150 if (next_stmt)
151 return result;
152 else
153 return -1;
157 /* Function vect_insert_into_interleaving_chain.
159 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
161 static void
162 vect_insert_into_interleaving_chain (struct data_reference *dra,
163 struct data_reference *drb)
165 gimple prev, next;
166 tree next_init;
167 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
168 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
170 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
171 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
172 while (next)
174 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
175 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
177 /* Insert here. */
178 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
179 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
180 return;
182 prev = next;
183 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
186 /* We got to the end of the list. Insert here. */
187 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
188 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
192 /* Function vect_update_interleaving_chain.
194 For two data-refs DRA and DRB that are a part of a chain interleaved data
195 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
197 There are four possible cases:
198 1. New stmts - both DRA and DRB are not a part of any chain:
199 FIRST_DR = DRB
200 NEXT_DR (DRB) = DRA
201 2. DRB is a part of a chain and DRA is not:
202 no need to update FIRST_DR
203 no need to insert DRB
204 insert DRA according to init
205 3. DRA is a part of a chain and DRB is not:
206 if (init of FIRST_DR > init of DRB)
207 FIRST_DR = DRB
208 NEXT(FIRST_DR) = previous FIRST_DR
209 else
210 insert DRB according to its init
211 4. both DRA and DRB are in some interleaving chains:
212 choose the chain with the smallest init of FIRST_DR
213 insert the nodes of the second chain into the first one. */
215 static void
216 vect_update_interleaving_chain (struct data_reference *drb,
217 struct data_reference *dra)
219 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
220 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
221 tree next_init, init_dra_chain, init_drb_chain;
222 gimple first_a, first_b;
223 tree node_init;
224 gimple node, prev, next, first_stmt;
226 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
227 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
229 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
230 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
231 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
232 return;
235 /* 2. DRB is a part of a chain and DRA is not. */
236 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
238 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
239 /* Insert DRA into the chain of DRB. */
240 vect_insert_into_interleaving_chain (dra, drb);
241 return;
244 /* 3. DRA is a part of a chain and DRB is not. */
245 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
247 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
248 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
249 old_first_stmt)));
250 gimple tmp;
252 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
254 /* DRB's init is smaller than the init of the stmt previously marked
255 as the first stmt of the interleaving chain of DRA. Therefore, we
256 update FIRST_STMT and put DRB in the head of the list. */
257 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
258 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
260 /* Update all the stmts in the list to point to the new FIRST_STMT. */
261 tmp = old_first_stmt;
262 while (tmp)
264 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
265 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
268 else
270 /* Insert DRB in the list of DRA. */
271 vect_insert_into_interleaving_chain (drb, dra);
272 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
274 return;
277 /* 4. both DRA and DRB are in some interleaving chains. */
278 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
279 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
280 if (first_a == first_b)
281 return;
282 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
283 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
285 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
287 /* Insert the nodes of DRA chain into the DRB chain.
288 After inserting a node, continue from this node of the DRB chain (don't
289 start from the beginning. */
290 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
291 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
292 first_stmt = first_b;
294 else
296 /* Insert the nodes of DRB chain into the DRA chain.
297 After inserting a node, continue from this node of the DRA chain (don't
298 start from the beginning. */
299 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
300 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
301 first_stmt = first_a;
304 while (node)
306 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
307 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
308 while (next)
310 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
311 if (tree_int_cst_compare (next_init, node_init) > 0)
313 /* Insert here. */
314 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
315 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
316 prev = node;
317 break;
319 prev = next;
320 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
322 if (!next)
324 /* We got to the end of the list. Insert here. */
325 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
326 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
327 prev = node;
329 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
330 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
334 /* Check dependence between DRA and DRB for basic block vectorization.
335 If the accesses share same bases and offsets, we can compare their initial
336 constant offsets to decide whether they differ or not. In case of a read-
337 write dependence we check that the load is before the store to ensure that
338 vectorization will not change the order of the accesses. */
340 static bool
341 vect_drs_dependent_in_basic_block (struct data_reference *dra,
342 struct data_reference *drb)
344 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
345 gimple earlier_stmt;
347 /* We only call this function for pairs of loads and stores, but we verify
348 it here. */
349 if (DR_IS_READ (dra) == DR_IS_READ (drb))
351 if (DR_IS_READ (dra))
352 return false;
353 else
354 return true;
357 /* Check that the data-refs have same bases and offsets. If not, we can't
358 determine if they are dependent. */
359 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
360 || !dr_equal_offsets_p (dra, drb))
361 return true;
363 /* Check the types. */
364 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
365 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
367 if (type_size_a != type_size_b
368 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
369 TREE_TYPE (DR_REF (drb))))
370 return true;
372 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
373 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
375 /* Two different locations - no dependence. */
376 if (init_a != init_b)
377 return false;
379 /* We have a read-write dependence. Check that the load is before the store.
380 When we vectorize basic blocks, vector load can be only before
381 corresponding scalar load, and vector store can be only after its
382 corresponding scalar store. So the order of the acceses is preserved in
383 case the load is before the store. */
384 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
385 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
386 return false;
388 return true;
392 /* Function vect_check_interleaving.
394 Check if DRA and DRB are a part of interleaving. In case they are, insert
395 DRA and DRB in an interleaving chain. */
397 static bool
398 vect_check_interleaving (struct data_reference *dra,
399 struct data_reference *drb)
401 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
403 /* Check that the data-refs have same first location (except init) and they
404 are both either store or load (not load and store). */
405 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
406 || !dr_equal_offsets_p (dra, drb)
407 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
408 || DR_IS_READ (dra) != DR_IS_READ (drb))
409 return false;
411 /* Check:
412 1. data-refs are of the same type
413 2. their steps are equal
414 3. the step (if greater than zero) is greater than the difference between
415 data-refs' inits. */
416 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
417 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
419 if (type_size_a != type_size_b
420 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
421 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
422 TREE_TYPE (DR_REF (drb))))
423 return false;
425 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
426 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
427 step = TREE_INT_CST_LOW (DR_STEP (dra));
429 if (init_a > init_b)
431 /* If init_a == init_b + the size of the type * k, we have an interleaving,
432 and DRB is accessed before DRA. */
433 diff_mod_size = (init_a - init_b) % type_size_a;
435 if (step && (init_a - init_b) > step)
436 return false;
438 if (diff_mod_size == 0)
440 vect_update_interleaving_chain (drb, dra);
441 if (dump_enabled_p ())
443 dump_printf_loc (MSG_NOTE, vect_location,
444 "Detected interleaving ");
445 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
446 dump_printf (MSG_NOTE, " and ");
447 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
449 return true;
452 else
454 /* If init_b == init_a + the size of the type * k, we have an
455 interleaving, and DRA is accessed before DRB. */
456 diff_mod_size = (init_b - init_a) % type_size_a;
458 if (step && (init_b - init_a) > step)
459 return false;
461 if (diff_mod_size == 0)
463 vect_update_interleaving_chain (dra, drb);
464 if (dump_enabled_p ())
466 dump_printf_loc (MSG_NOTE, vect_location,
467 "Detected interleaving ");
468 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
469 dump_printf (MSG_NOTE, " and ");
470 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
472 return true;
476 return false;
479 /* Check if data references pointed by DR_I and DR_J are same or
480 belong to same interleaving group. Return FALSE if drs are
481 different, otherwise return TRUE. */
483 static bool
484 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
486 gimple stmt_i = DR_STMT (dr_i);
487 gimple stmt_j = DR_STMT (dr_j);
489 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
490 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
491 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
492 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
493 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
494 return true;
495 else
496 return false;
499 /* If address ranges represented by DDR_I and DDR_J are equal,
500 return TRUE, otherwise return FALSE. */
502 static bool
503 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
505 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
506 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
507 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
508 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
509 return true;
510 else
511 return false;
514 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
515 tested at run-time. Return TRUE if DDR was successfully inserted.
516 Return false if versioning is not supported. */
518 static bool
519 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
521 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
523 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
524 return false;
526 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "mark for run-time aliasing test between ");
530 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
531 dump_printf (MSG_NOTE, " and ");
532 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
535 if (optimize_loop_nest_for_size_p (loop))
537 if (dump_enabled_p ())
538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
539 "versioning not supported when optimizing for size.");
540 return false;
543 /* FORNOW: We don't support versioning with outer-loop vectorization. */
544 if (loop->inner)
546 if (dump_enabled_p ())
547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
548 "versioning not yet supported for outer-loops.");
549 return false;
552 /* FORNOW: We don't support creating runtime alias tests for non-constant
553 step. */
554 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
555 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
559 "versioning not yet supported for non-constant "
560 "step");
561 return false;
564 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
565 return true;
569 /* Function vect_analyze_data_ref_dependence.
571 Return TRUE if there (might) exist a dependence between a memory-reference
572 DRA and a memory-reference DRB. When versioning for alias may check a
573 dependence at run-time, return FALSE. Adjust *MAX_VF according to
574 the data dependence. */
576 static bool
577 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
578 loop_vec_info loop_vinfo, int *max_vf)
580 unsigned int i;
581 struct loop *loop = NULL;
582 struct data_reference *dra = DDR_A (ddr);
583 struct data_reference *drb = DDR_B (ddr);
584 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
585 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
586 lambda_vector dist_v;
587 unsigned int loop_depth;
589 /* Don't bother to analyze statements marked as unvectorizable. */
590 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
591 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
592 return false;
594 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
596 /* Independent data accesses. */
597 vect_check_interleaving (dra, drb);
598 return false;
601 if (loop_vinfo)
602 loop = LOOP_VINFO_LOOP (loop_vinfo);
604 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
605 return false;
607 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
609 gimple earlier_stmt;
611 if (loop_vinfo)
613 if (dump_enabled_p ())
615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
616 "versioning for alias required: "
617 "can't determine dependence between ");
618 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
619 DR_REF (dra));
620 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
621 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
622 DR_REF (drb));
625 /* Add to list of ddrs that need to be tested at run-time. */
626 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
629 /* When vectorizing a basic block unknown depnedence can still mean
630 grouped access. */
631 if (vect_check_interleaving (dra, drb))
632 return false;
634 /* Read-read is OK (we need this check here, after checking for
635 interleaving). */
636 if (DR_IS_READ (dra) && DR_IS_READ (drb))
637 return false;
639 if (dump_enabled_p ())
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "can't determine dependence between ");
643 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
644 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
645 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
648 /* We do not vectorize basic blocks with write-write dependencies. */
649 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
650 return true;
652 /* Check that it's not a load-after-store dependence. */
653 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
654 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
655 return true;
657 return false;
660 /* Versioning for alias is not yet supported for basic block SLP, and
661 dependence distance is unapplicable, hence, in case of known data
662 dependence, basic block vectorization is impossible for now. */
663 if (!loop_vinfo)
665 if (dra != drb && vect_check_interleaving (dra, drb))
666 return false;
668 if (dump_enabled_p ())
670 dump_printf_loc (MSG_NOTE, vect_location,
671 "determined dependence between ");
672 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
673 dump_printf (MSG_NOTE, " and ");
674 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
677 /* Do not vectorize basic blcoks with write-write dependences. */
678 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
679 return true;
681 /* Check if this dependence is allowed in basic block vectorization. */
682 return vect_drs_dependent_in_basic_block (dra, drb);
685 /* Loop-based vectorization and known data dependence. */
686 if (DDR_NUM_DIST_VECTS (ddr) == 0)
688 if (dump_enabled_p ())
690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
691 "versioning for alias required: "
692 "bad dist vector for ");
693 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
694 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
695 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
697 /* Add to list of ddrs that need to be tested at run-time. */
698 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
701 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
702 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
704 int dist = dist_v[loop_depth];
706 if (dump_enabled_p ())
707 dump_printf_loc (MSG_NOTE, vect_location,
708 "dependence distance = %d.", dist);
710 if (dist == 0)
712 if (dump_enabled_p ())
714 dump_printf_loc (MSG_NOTE, vect_location,
715 "dependence distance == 0 between ");
716 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
717 dump_printf (MSG_NOTE, " and ");
718 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
721 /* For interleaving, mark that there is a read-write dependency if
722 necessary. We check before that one of the data-refs is store. */
723 if (DR_IS_READ (dra))
724 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
725 else
727 if (DR_IS_READ (drb))
728 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
731 continue;
734 if (dist > 0 && DDR_REVERSED_P (ddr))
736 /* If DDR_REVERSED_P the order of the data-refs in DDR was
737 reversed (to make distance vector positive), and the actual
738 distance is negative. */
739 if (dump_enabled_p ())
740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
741 "dependence distance negative.");
742 continue;
745 if (abs (dist) >= 2
746 && abs (dist) < *max_vf)
748 /* The dependence distance requires reduction of the maximal
749 vectorization factor. */
750 *max_vf = abs (dist);
751 if (dump_enabled_p ())
752 dump_printf_loc (MSG_NOTE, vect_location,
753 "adjusting maximal vectorization factor to %i",
754 *max_vf);
757 if (abs (dist) >= *max_vf)
759 /* Dependence distance does not create dependence, as far as
760 vectorization is concerned, in this case. */
761 if (dump_enabled_p ())
762 dump_printf_loc (MSG_NOTE, vect_location,
763 "dependence distance >= VF.");
764 continue;
767 if (dump_enabled_p ())
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
770 "not vectorized, possible dependence "
771 "between data-refs ");
772 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
773 dump_printf (MSG_NOTE, " and ");
774 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
777 return true;
780 return false;
783 /* Function vect_analyze_data_ref_dependences.
785 Examine all the data references in the loop, and make sure there do not
786 exist any data dependences between them. Set *MAX_VF according to
787 the maximum vectorization factor the data dependences allow. */
789 bool
790 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
791 bb_vec_info bb_vinfo, int *max_vf)
793 unsigned int i;
794 vec<ddr_p> ddrs = vNULL;
795 struct data_dependence_relation *ddr;
797 if (dump_enabled_p ())
798 dump_printf_loc (MSG_NOTE, vect_location,
799 "=== vect_analyze_dependences ===");
800 if (loop_vinfo)
801 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
802 else
803 ddrs = BB_VINFO_DDRS (bb_vinfo);
805 FOR_EACH_VEC_ELT (ddrs, i, ddr)
806 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
807 return false;
809 return true;
813 /* Function vect_compute_data_ref_alignment
815 Compute the misalignment of the data reference DR.
817 Output:
818 1. If during the misalignment computation it is found that the data reference
819 cannot be vectorized then false is returned.
820 2. DR_MISALIGNMENT (DR) is defined.
822 FOR NOW: No analysis is actually performed. Misalignment is calculated
823 only for trivial cases. TODO. */
825 static bool
826 vect_compute_data_ref_alignment (struct data_reference *dr)
828 gimple stmt = DR_STMT (dr);
829 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
830 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
831 struct loop *loop = NULL;
832 tree ref = DR_REF (dr);
833 tree vectype;
834 tree base, base_addr;
835 bool base_aligned;
836 tree misalign;
837 tree aligned_to, alignment;
839 if (dump_enabled_p ())
840 dump_printf_loc (MSG_NOTE, vect_location,
841 "vect_compute_data_ref_alignment:");
843 if (loop_vinfo)
844 loop = LOOP_VINFO_LOOP (loop_vinfo);
846 /* Initialize misalignment to unknown. */
847 SET_DR_MISALIGNMENT (dr, -1);
849 /* Strided loads perform only component accesses, misalignment information
850 is irrelevant for them. */
851 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
852 return true;
854 misalign = DR_INIT (dr);
855 aligned_to = DR_ALIGNED_TO (dr);
856 base_addr = DR_BASE_ADDRESS (dr);
857 vectype = STMT_VINFO_VECTYPE (stmt_info);
859 /* In case the dataref is in an inner-loop of the loop that is being
860 vectorized (LOOP), we use the base and misalignment information
861 relative to the outer-loop (LOOP). This is ok only if the misalignment
862 stays the same throughout the execution of the inner-loop, which is why
863 we have to check that the stride of the dataref in the inner-loop evenly
864 divides by the vector size. */
865 if (loop && nested_in_vect_loop_p (loop, stmt))
867 tree step = DR_STEP (dr);
868 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
870 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
872 if (dump_enabled_p ())
873 dump_printf_loc (MSG_NOTE, vect_location,
874 "inner step divides the vector-size.");
875 misalign = STMT_VINFO_DR_INIT (stmt_info);
876 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
877 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
879 else
881 if (dump_enabled_p ())
882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
883 "inner step doesn't divide the vector-size.");
884 misalign = NULL_TREE;
888 /* Similarly, if we're doing basic-block vectorization, we can only use
889 base and misalignment information relative to an innermost loop if the
890 misalignment stays the same throughout the execution of the loop.
891 As above, this is the case if the stride of the dataref evenly divides
892 by the vector size. */
893 if (!loop)
895 tree step = DR_STEP (dr);
896 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
898 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
902 "SLP: step doesn't divide the vector-size.");
903 misalign = NULL_TREE;
907 base = build_fold_indirect_ref (base_addr);
908 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
910 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
911 || !misalign)
913 if (dump_enabled_p ())
915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
916 "Unknown alignment for access: ");
917 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
919 return true;
922 if ((DECL_P (base)
923 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
924 alignment) >= 0)
925 || (TREE_CODE (base_addr) == SSA_NAME
926 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
927 TREE_TYPE (base_addr)))),
928 alignment) >= 0)
929 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
930 base_aligned = true;
931 else
932 base_aligned = false;
934 if (!base_aligned)
936 /* Do not change the alignment of global variables here if
937 flag_section_anchors is enabled as we already generated
938 RTL for other functions. Most global variables should
939 have been aligned during the IPA increase_alignment pass. */
940 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
941 || (TREE_STATIC (base) && flag_section_anchors))
943 if (dump_enabled_p ())
945 dump_printf_loc (MSG_NOTE, vect_location,
946 "can't force alignment of ref: ");
947 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
949 return true;
952 /* Force the alignment of the decl.
953 NOTE: This is the only change to the code we make during
954 the analysis phase, before deciding to vectorize the loop. */
955 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
958 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
961 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
962 DECL_USER_ALIGN (base) = 1;
965 /* At this point we assume that the base is aligned. */
966 gcc_assert (base_aligned
967 || (TREE_CODE (base) == VAR_DECL
968 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
970 /* If this is a backward running DR then first access in the larger
971 vectype actually is N-1 elements before the address in the DR.
972 Adjust misalign accordingly. */
973 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
975 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
976 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
977 otherwise we wouldn't be here. */
978 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
979 /* PLUS because DR_STEP was negative. */
980 misalign = size_binop (PLUS_EXPR, misalign, offset);
983 /* Modulo alignment. */
984 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
986 if (!host_integerp (misalign, 1))
988 /* Negative or overflowed misalignment value. */
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
991 "unexpected misalign value");
992 return false;
995 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
997 if (dump_enabled_p ())
999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1000 "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1001 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
1004 return true;
1008 /* Function vect_compute_data_refs_alignment
1010 Compute the misalignment of data references in the loop.
1011 Return FALSE if a data reference is found that cannot be vectorized. */
1013 static bool
1014 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
1015 bb_vec_info bb_vinfo)
1017 vec<data_reference_p> datarefs;
1018 struct data_reference *dr;
1019 unsigned int i;
1021 if (loop_vinfo)
1022 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1023 else
1024 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1026 FOR_EACH_VEC_ELT (datarefs, i, dr)
1027 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
1028 && !vect_compute_data_ref_alignment (dr))
1030 if (bb_vinfo)
1032 /* Mark unsupported statement as unvectorizable. */
1033 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1034 continue;
1036 else
1037 return false;
1040 return true;
1044 /* Function vect_update_misalignment_for_peel
1046 DR - the data reference whose misalignment is to be adjusted.
1047 DR_PEEL - the data reference whose misalignment is being made
1048 zero in the vector loop by the peel.
1049 NPEEL - the number of iterations in the peel loop if the misalignment
1050 of DR_PEEL is known at compile time. */
1052 static void
1053 vect_update_misalignment_for_peel (struct data_reference *dr,
1054 struct data_reference *dr_peel, int npeel)
1056 unsigned int i;
1057 vec<dr_p> same_align_drs;
1058 struct data_reference *current_dr;
1059 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1060 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1061 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1062 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1064 /* For interleaved data accesses the step in the loop must be multiplied by
1065 the size of the interleaving group. */
1066 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1067 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1068 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
1069 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1071 /* It can be assumed that the data refs with the same alignment as dr_peel
1072 are aligned in the vector loop. */
1073 same_align_drs
1074 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1075 FOR_EACH_VEC_ELT (same_align_drs, i, current_dr)
1077 if (current_dr != dr)
1078 continue;
1079 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1080 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1081 SET_DR_MISALIGNMENT (dr, 0);
1082 return;
1085 if (known_alignment_for_access_p (dr)
1086 && known_alignment_for_access_p (dr_peel))
1088 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1089 int misal = DR_MISALIGNMENT (dr);
1090 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1091 misal += negative ? -npeel * dr_size : npeel * dr_size;
1092 misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
1093 SET_DR_MISALIGNMENT (dr, misal);
1094 return;
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment to -1.");
1099 SET_DR_MISALIGNMENT (dr, -1);
1103 /* Function vect_verify_datarefs_alignment
1105 Return TRUE if all data references in the loop can be
1106 handled with respect to alignment. */
1108 bool
1109 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1111 vec<data_reference_p> datarefs;
1112 struct data_reference *dr;
1113 enum dr_alignment_support supportable_dr_alignment;
1114 unsigned int i;
1116 if (loop_vinfo)
1117 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1118 else
1119 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1121 FOR_EACH_VEC_ELT (datarefs, i, dr)
1123 gimple stmt = DR_STMT (dr);
1124 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1126 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1127 continue;
1129 /* For interleaving, only the alignment of the first access matters.
1130 Skip statements marked as not vectorizable. */
1131 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
1132 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1133 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1134 continue;
1136 /* Strided loads perform only component accesses, alignment is
1137 irrelevant for them. */
1138 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1139 continue;
1141 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1142 if (!supportable_dr_alignment)
1144 if (dump_enabled_p ())
1146 if (DR_IS_READ (dr))
1147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1148 "not vectorized: unsupported unaligned load.");
1149 else
1150 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1151 "not vectorized: unsupported unaligned "
1152 "store.");
1154 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1155 DR_REF (dr));
1157 return false;
1159 if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
1160 dump_printf_loc (MSG_NOTE, vect_location,
1161 "Vectorizing an unaligned access.");
1163 return true;
1166 /* Given an memory reference EXP return whether its alignment is less
1167 than its size. */
1169 static bool
1170 not_size_aligned (tree exp)
1172 if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp)), 1))
1173 return true;
1175 return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp)))
1176 > get_object_alignment (exp));
1179 /* Function vector_alignment_reachable_p
1181 Return true if vector alignment for DR is reachable by peeling
1182 a few loop iterations. Return false otherwise. */
1184 static bool
1185 vector_alignment_reachable_p (struct data_reference *dr)
1187 gimple stmt = DR_STMT (dr);
1188 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1189 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1191 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1193 /* For interleaved access we peel only if number of iterations in
1194 the prolog loop ({VF - misalignment}), is a multiple of the
1195 number of the interleaved accesses. */
1196 int elem_size, mis_in_elements;
1197 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1199 /* FORNOW: handle only known alignment. */
1200 if (!known_alignment_for_access_p (dr))
1201 return false;
1203 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1204 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1206 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1207 return false;
1210 /* If misalignment is known at the compile time then allow peeling
1211 only if natural alignment is reachable through peeling. */
1212 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1214 HOST_WIDE_INT elmsize =
1215 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1216 if (dump_enabled_p ())
1218 dump_printf_loc (MSG_NOTE, vect_location,
1219 "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1220 dump_printf (MSG_NOTE,
1221 ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1223 if (DR_MISALIGNMENT (dr) % elmsize)
1225 if (dump_enabled_p ())
1226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1227 "data size does not divide the misalignment.\n");
1228 return false;
1232 if (!known_alignment_for_access_p (dr))
1234 tree type = TREE_TYPE (DR_REF (dr));
1235 bool is_packed = not_size_aligned (DR_REF (dr));
1236 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1238 "Unknown misalignment, is_packed = %d",is_packed);
1239 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1240 return true;
1241 else
1242 return false;
1245 return true;
1249 /* Calculate the cost of the memory access represented by DR. */
1251 static void
1252 vect_get_data_access_cost (struct data_reference *dr,
1253 unsigned int *inside_cost,
1254 unsigned int *outside_cost,
1255 stmt_vector_for_cost *body_cost_vec)
1257 gimple stmt = DR_STMT (dr);
1258 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1259 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1261 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1262 int ncopies = vf / nunits;
1264 if (DR_IS_READ (dr))
1265 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1266 NULL, body_cost_vec, false);
1267 else
1268 vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1270 if (dump_enabled_p ())
1271 dump_printf_loc (MSG_NOTE, vect_location,
1272 "vect_get_data_access_cost: inside_cost = %d, "
1273 "outside_cost = %d.", *inside_cost, *outside_cost);
1277 static hashval_t
1278 vect_peeling_hash (const void *elem)
1280 const struct _vect_peel_info *peel_info;
1282 peel_info = (const struct _vect_peel_info *) elem;
1283 return (hashval_t) peel_info->npeel;
1287 static int
1288 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1290 const struct _vect_peel_info *a, *b;
1292 a = (const struct _vect_peel_info *) elem1;
1293 b = (const struct _vect_peel_info *) elem2;
1294 return (a->npeel == b->npeel);
1298 /* Insert DR into peeling hash table with NPEEL as key. */
1300 static void
1301 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1302 int npeel)
1304 struct _vect_peel_info elem, *slot;
1305 void **new_slot;
1306 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1308 elem.npeel = npeel;
1309 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1310 &elem);
1311 if (slot)
1312 slot->count++;
1313 else
1315 slot = XNEW (struct _vect_peel_info);
1316 slot->npeel = npeel;
1317 slot->dr = dr;
1318 slot->count = 1;
1319 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1320 INSERT);
1321 *new_slot = slot;
1324 if (!supportable_dr_alignment && !flag_vect_cost_model)
1325 slot->count += VECT_MAX_COST;
1329 /* Traverse peeling hash table to find peeling option that aligns maximum
1330 number of data accesses. */
1332 static int
1333 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1335 vect_peel_info elem = (vect_peel_info) *slot;
1336 vect_peel_extended_info max = (vect_peel_extended_info) data;
1338 if (elem->count > max->peel_info.count
1339 || (elem->count == max->peel_info.count
1340 && max->peel_info.npeel > elem->npeel))
1342 max->peel_info.npeel = elem->npeel;
1343 max->peel_info.count = elem->count;
1344 max->peel_info.dr = elem->dr;
1347 return 1;
1351 /* Traverse peeling hash table and calculate cost for each peeling option.
1352 Find the one with the lowest cost. */
1354 static int
1355 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1357 vect_peel_info elem = (vect_peel_info) *slot;
1358 vect_peel_extended_info min = (vect_peel_extended_info) data;
1359 int save_misalignment, dummy;
1360 unsigned int inside_cost = 0, outside_cost = 0, i;
1361 gimple stmt = DR_STMT (elem->dr);
1362 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1363 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1364 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1365 struct data_reference *dr;
1366 stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1367 int single_iter_cost;
1369 prologue_cost_vec.create (2);
1370 body_cost_vec.create (2);
1371 epilogue_cost_vec.create (2);
1373 FOR_EACH_VEC_ELT (datarefs, i, dr)
1375 stmt = DR_STMT (dr);
1376 stmt_info = vinfo_for_stmt (stmt);
1377 /* For interleaving, only the alignment of the first access
1378 matters. */
1379 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1380 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1381 continue;
1383 save_misalignment = DR_MISALIGNMENT (dr);
1384 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1385 vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1386 &body_cost_vec);
1387 SET_DR_MISALIGNMENT (dr, save_misalignment);
1390 single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1391 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1392 &dummy, single_iter_cost,
1393 &prologue_cost_vec,
1394 &epilogue_cost_vec);
1396 /* Prologue and epilogue costs are added to the target model later.
1397 These costs depend only on the scalar iteration cost, the
1398 number of peeling iterations finally chosen, and the number of
1399 misaligned statements. So discard the information found here. */
1400 prologue_cost_vec.release ();
1401 epilogue_cost_vec.release ();
1403 if (inside_cost < min->inside_cost
1404 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1406 min->inside_cost = inside_cost;
1407 min->outside_cost = outside_cost;
1408 min->body_cost_vec.release ();
1409 min->body_cost_vec = body_cost_vec;
1410 min->peel_info.dr = elem->dr;
1411 min->peel_info.npeel = elem->npeel;
1413 else
1414 body_cost_vec.release ();
1416 return 1;
1420 /* Choose best peeling option by traversing peeling hash table and either
1421 choosing an option with the lowest cost (if cost model is enabled) or the
1422 option that aligns as many accesses as possible. */
1424 static struct data_reference *
1425 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1426 unsigned int *npeel,
1427 stmt_vector_for_cost *body_cost_vec)
1429 struct _vect_peel_extended_info res;
1431 res.peel_info.dr = NULL;
1432 res.body_cost_vec = stmt_vector_for_cost();
1434 if (flag_vect_cost_model)
1436 res.inside_cost = INT_MAX;
1437 res.outside_cost = INT_MAX;
1438 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1439 vect_peeling_hash_get_lowest_cost, &res);
1441 else
1443 res.peel_info.count = 0;
1444 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1445 vect_peeling_hash_get_most_frequent, &res);
1448 *npeel = res.peel_info.npeel;
1449 *body_cost_vec = res.body_cost_vec;
1450 return res.peel_info.dr;
1454 /* Function vect_enhance_data_refs_alignment
1456 This pass will use loop versioning and loop peeling in order to enhance
1457 the alignment of data references in the loop.
1459 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1460 original loop is to be vectorized. Any other loops that are created by
1461 the transformations performed in this pass - are not supposed to be
1462 vectorized. This restriction will be relaxed.
1464 This pass will require a cost model to guide it whether to apply peeling
1465 or versioning or a combination of the two. For example, the scheme that
1466 intel uses when given a loop with several memory accesses, is as follows:
1467 choose one memory access ('p') which alignment you want to force by doing
1468 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1469 other accesses are not necessarily aligned, or (2) use loop versioning to
1470 generate one loop in which all accesses are aligned, and another loop in
1471 which only 'p' is necessarily aligned.
1473 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1474 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1475 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1477 Devising a cost model is the most critical aspect of this work. It will
1478 guide us on which access to peel for, whether to use loop versioning, how
1479 many versions to create, etc. The cost model will probably consist of
1480 generic considerations as well as target specific considerations (on
1481 powerpc for example, misaligned stores are more painful than misaligned
1482 loads).
1484 Here are the general steps involved in alignment enhancements:
1486 -- original loop, before alignment analysis:
1487 for (i=0; i<N; i++){
1488 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1489 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1492 -- After vect_compute_data_refs_alignment:
1493 for (i=0; i<N; i++){
1494 x = q[i]; # DR_MISALIGNMENT(q) = 3
1495 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1498 -- Possibility 1: we do loop versioning:
1499 if (p is aligned) {
1500 for (i=0; i<N; i++){ # loop 1A
1501 x = q[i]; # DR_MISALIGNMENT(q) = 3
1502 p[i] = y; # DR_MISALIGNMENT(p) = 0
1505 else {
1506 for (i=0; i<N; i++){ # loop 1B
1507 x = q[i]; # DR_MISALIGNMENT(q) = 3
1508 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1512 -- Possibility 2: we do loop peeling:
1513 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1514 x = q[i];
1515 p[i] = y;
1517 for (i = 3; i < N; i++){ # loop 2A
1518 x = q[i]; # DR_MISALIGNMENT(q) = 0
1519 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1522 -- Possibility 3: combination of loop peeling and versioning:
1523 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1524 x = q[i];
1525 p[i] = y;
1527 if (p is aligned) {
1528 for (i = 3; i<N; i++){ # loop 3A
1529 x = q[i]; # DR_MISALIGNMENT(q) = 0
1530 p[i] = y; # DR_MISALIGNMENT(p) = 0
1533 else {
1534 for (i = 3; i<N; i++){ # loop 3B
1535 x = q[i]; # DR_MISALIGNMENT(q) = 0
1536 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1540 These loops are later passed to loop_transform to be vectorized. The
1541 vectorizer will use the alignment information to guide the transformation
1542 (whether to generate regular loads/stores, or with special handling for
1543 misalignment). */
1545 bool
1546 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1548 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1549 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1550 enum dr_alignment_support supportable_dr_alignment;
1551 struct data_reference *dr0 = NULL, *first_store = NULL;
1552 struct data_reference *dr;
1553 unsigned int i, j;
1554 bool do_peeling = false;
1555 bool do_versioning = false;
1556 bool stat;
1557 gimple stmt;
1558 stmt_vec_info stmt_info;
1559 int vect_versioning_for_alias_required;
1560 unsigned int npeel = 0;
1561 bool all_misalignments_unknown = true;
1562 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1563 unsigned possible_npeel_number = 1;
1564 tree vectype;
1565 unsigned int nelements, mis, same_align_drs_max = 0;
1566 stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost();
1568 if (dump_enabled_p ())
1569 dump_printf_loc (MSG_NOTE, vect_location,
1570 "=== vect_enhance_data_refs_alignment ===");
1572 /* While cost model enhancements are expected in the future, the high level
1573 view of the code at this time is as follows:
1575 A) If there is a misaligned access then see if peeling to align
1576 this access can make all data references satisfy
1577 vect_supportable_dr_alignment. If so, update data structures
1578 as needed and return true.
1580 B) If peeling wasn't possible and there is a data reference with an
1581 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1582 then see if loop versioning checks can be used to make all data
1583 references satisfy vect_supportable_dr_alignment. If so, update
1584 data structures as needed and return true.
1586 C) If neither peeling nor versioning were successful then return false if
1587 any data reference does not satisfy vect_supportable_dr_alignment.
1589 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1591 Note, Possibility 3 above (which is peeling and versioning together) is not
1592 being done at this time. */
1594 /* (1) Peeling to force alignment. */
1596 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1597 Considerations:
1598 + How many accesses will become aligned due to the peeling
1599 - How many accesses will become unaligned due to the peeling,
1600 and the cost of misaligned accesses.
1601 - The cost of peeling (the extra runtime checks, the increase
1602 in code size). */
1604 FOR_EACH_VEC_ELT (datarefs, i, dr)
1606 stmt = DR_STMT (dr);
1607 stmt_info = vinfo_for_stmt (stmt);
1609 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1610 continue;
1612 /* For interleaving, only the alignment of the first access
1613 matters. */
1614 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1615 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1616 continue;
1618 /* FORNOW: Any strided load prevents peeling. The induction
1619 variable analysis will fail when the prologue loop is generated,
1620 and so we can't generate the new base for the pointer. */
1621 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1623 if (dump_enabled_p ())
1624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1625 "strided load prevents peeling");
1626 do_peeling = false;
1627 break;
1630 /* For invariant accesses there is nothing to enhance. */
1631 if (integer_zerop (DR_STEP (dr)))
1632 continue;
1634 /* Strided loads perform only component accesses, alignment is
1635 irrelevant for them. */
1636 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1637 continue;
1639 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1640 do_peeling = vector_alignment_reachable_p (dr);
1641 if (do_peeling)
1643 if (known_alignment_for_access_p (dr))
1645 unsigned int npeel_tmp;
1646 bool negative = tree_int_cst_compare (DR_STEP (dr),
1647 size_zero_node) < 0;
1649 /* Save info about DR in the hash table. */
1650 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1651 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1652 htab_create (1, vect_peeling_hash,
1653 vect_peeling_hash_eq, free);
1655 vectype = STMT_VINFO_VECTYPE (stmt_info);
1656 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1657 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1658 TREE_TYPE (DR_REF (dr))));
1659 npeel_tmp = (negative
1660 ? (mis - nelements) : (nelements - mis))
1661 & (nelements - 1);
1663 /* For multiple types, it is possible that the bigger type access
1664 will have more than one peeling option. E.g., a loop with two
1665 types: one of size (vector size / 4), and the other one of
1666 size (vector size / 8). Vectorization factor will 8. If both
1667 access are misaligned by 3, the first one needs one scalar
1668 iteration to be aligned, and the second one needs 5. But the
1669 the first one will be aligned also by peeling 5 scalar
1670 iterations, and in that case both accesses will be aligned.
1671 Hence, except for the immediate peeling amount, we also want
1672 to try to add full vector size, while we don't exceed
1673 vectorization factor.
1674 We do this automtically for cost model, since we calculate cost
1675 for every peeling option. */
1676 if (!flag_vect_cost_model)
1677 possible_npeel_number = vf /nelements;
1679 /* Handle the aligned case. We may decide to align some other
1680 access, making DR unaligned. */
1681 if (DR_MISALIGNMENT (dr) == 0)
1683 npeel_tmp = 0;
1684 if (!flag_vect_cost_model)
1685 possible_npeel_number++;
1688 for (j = 0; j < possible_npeel_number; j++)
1690 gcc_assert (npeel_tmp <= vf);
1691 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1692 npeel_tmp += nelements;
1695 all_misalignments_unknown = false;
1696 /* Data-ref that was chosen for the case that all the
1697 misalignments are unknown is not relevant anymore, since we
1698 have a data-ref with known alignment. */
1699 dr0 = NULL;
1701 else
1703 /* If we don't know all the misalignment values, we prefer
1704 peeling for data-ref that has maximum number of data-refs
1705 with the same alignment, unless the target prefers to align
1706 stores over load. */
1707 if (all_misalignments_unknown)
1709 if (same_align_drs_max
1710 < STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ()
1711 || !dr0)
1713 same_align_drs_max
1714 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
1715 dr0 = dr;
1718 if (!first_store && DR_IS_WRITE (dr))
1719 first_store = dr;
1722 /* If there are both known and unknown misaligned accesses in the
1723 loop, we choose peeling amount according to the known
1724 accesses. */
1727 if (!supportable_dr_alignment)
1729 dr0 = dr;
1730 if (!first_store && DR_IS_WRITE (dr))
1731 first_store = dr;
1735 else
1737 if (!aligned_access_p (dr))
1739 if (dump_enabled_p ())
1740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1741 "vector alignment may not be reachable");
1742 break;
1747 vect_versioning_for_alias_required
1748 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1750 /* Temporarily, if versioning for alias is required, we disable peeling
1751 until we support peeling and versioning. Often peeling for alignment
1752 will require peeling for loop-bound, which in turn requires that we
1753 know how to adjust the loop ivs after the loop. */
1754 if (vect_versioning_for_alias_required
1755 || !vect_can_advance_ivs_p (loop_vinfo)
1756 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1757 do_peeling = false;
1759 if (do_peeling && all_misalignments_unknown
1760 && vect_supportable_dr_alignment (dr0, false))
1763 /* Check if the target requires to prefer stores over loads, i.e., if
1764 misaligned stores are more expensive than misaligned loads (taking
1765 drs with same alignment into account). */
1766 if (first_store && DR_IS_READ (dr0))
1768 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1769 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1770 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1771 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1772 stmt_vector_for_cost dummy;
1773 dummy.create (2);
1775 vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1776 &dummy);
1777 vect_get_data_access_cost (first_store, &store_inside_cost,
1778 &store_outside_cost, &dummy);
1780 dummy.release ();
1782 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1783 aligning the load DR0). */
1784 load_inside_penalty = store_inside_cost;
1785 load_outside_penalty = store_outside_cost;
1786 for (i = 0;
1787 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1788 DR_STMT (first_store))).iterate (i, &dr);
1789 i++)
1790 if (DR_IS_READ (dr))
1792 load_inside_penalty += load_inside_cost;
1793 load_outside_penalty += load_outside_cost;
1795 else
1797 load_inside_penalty += store_inside_cost;
1798 load_outside_penalty += store_outside_cost;
1801 /* Calculate the penalty for leaving DR0 unaligned (by
1802 aligning the FIRST_STORE). */
1803 store_inside_penalty = load_inside_cost;
1804 store_outside_penalty = load_outside_cost;
1805 for (i = 0;
1806 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1807 DR_STMT (dr0))).iterate (i, &dr);
1808 i++)
1809 if (DR_IS_READ (dr))
1811 store_inside_penalty += load_inside_cost;
1812 store_outside_penalty += load_outside_cost;
1814 else
1816 store_inside_penalty += store_inside_cost;
1817 store_outside_penalty += store_outside_cost;
1820 if (load_inside_penalty > store_inside_penalty
1821 || (load_inside_penalty == store_inside_penalty
1822 && load_outside_penalty > store_outside_penalty))
1823 dr0 = first_store;
1826 /* In case there are only loads with different unknown misalignments, use
1827 peeling only if it may help to align other accesses in the loop. */
1828 if (!first_store
1829 && !STMT_VINFO_SAME_ALIGN_REFS (
1830 vinfo_for_stmt (DR_STMT (dr0))).length ()
1831 && vect_supportable_dr_alignment (dr0, false)
1832 != dr_unaligned_supported)
1833 do_peeling = false;
1836 if (do_peeling && !dr0)
1838 /* Peeling is possible, but there is no data access that is not supported
1839 unless aligned. So we try to choose the best possible peeling. */
1841 /* We should get here only if there are drs with known misalignment. */
1842 gcc_assert (!all_misalignments_unknown);
1844 /* Choose the best peeling from the hash table. */
1845 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
1846 &body_cost_vec);
1847 if (!dr0 || !npeel)
1848 do_peeling = false;
1851 if (do_peeling)
1853 stmt = DR_STMT (dr0);
1854 stmt_info = vinfo_for_stmt (stmt);
1855 vectype = STMT_VINFO_VECTYPE (stmt_info);
1856 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1858 if (known_alignment_for_access_p (dr0))
1860 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1861 size_zero_node) < 0;
1862 if (!npeel)
1864 /* Since it's known at compile time, compute the number of
1865 iterations in the peeled loop (the peeling factor) for use in
1866 updating DR_MISALIGNMENT values. The peeling factor is the
1867 vectorization factor minus the misalignment as an element
1868 count. */
1869 mis = DR_MISALIGNMENT (dr0);
1870 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1871 npeel = ((negative ? mis - nelements : nelements - mis)
1872 & (nelements - 1));
1875 /* For interleaved data access every iteration accesses all the
1876 members of the group, therefore we divide the number of iterations
1877 by the group size. */
1878 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1879 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1880 npeel /= GROUP_SIZE (stmt_info);
1882 if (dump_enabled_p ())
1883 dump_printf_loc (MSG_NOTE, vect_location,
1884 "Try peeling by %d", npeel);
1887 /* Ensure that all data refs can be vectorized after the peel. */
1888 FOR_EACH_VEC_ELT (datarefs, i, dr)
1890 int save_misalignment;
1892 if (dr == dr0)
1893 continue;
1895 stmt = DR_STMT (dr);
1896 stmt_info = vinfo_for_stmt (stmt);
1897 /* For interleaving, only the alignment of the first access
1898 matters. */
1899 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1900 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1901 continue;
1903 /* Strided loads perform only component accesses, alignment is
1904 irrelevant for them. */
1905 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1906 continue;
1908 save_misalignment = DR_MISALIGNMENT (dr);
1909 vect_update_misalignment_for_peel (dr, dr0, npeel);
1910 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1911 SET_DR_MISALIGNMENT (dr, save_misalignment);
1913 if (!supportable_dr_alignment)
1915 do_peeling = false;
1916 break;
1920 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1922 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1923 if (!stat)
1924 do_peeling = false;
1925 else
1927 body_cost_vec.release ();
1928 return stat;
1932 if (do_peeling)
1934 stmt_info_for_cost *si;
1935 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
1937 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1938 If the misalignment of DR_i is identical to that of dr0 then set
1939 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1940 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1941 by the peeling factor times the element size of DR_i (MOD the
1942 vectorization factor times the size). Otherwise, the
1943 misalignment of DR_i must be set to unknown. */
1944 FOR_EACH_VEC_ELT (datarefs, i, dr)
1945 if (dr != dr0)
1946 vect_update_misalignment_for_peel (dr, dr0, npeel);
1948 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1949 if (npeel)
1950 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1951 else
1952 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1953 SET_DR_MISALIGNMENT (dr0, 0);
1954 if (dump_enabled_p ())
1956 dump_printf_loc (MSG_NOTE, vect_location,
1957 "Alignment of access forced using peeling.");
1958 dump_printf_loc (MSG_NOTE, vect_location,
1959 "Peeling for alignment will be applied.");
1961 /* We've delayed passing the inside-loop peeling costs to the
1962 target cost model until we were sure peeling would happen.
1963 Do so now. */
1964 if (body_cost_vec.exists ())
1966 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1968 struct _stmt_vec_info *stmt_info
1969 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1970 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1971 si->misalign, vect_body);
1973 body_cost_vec.release ();
1976 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1977 gcc_assert (stat);
1978 return stat;
1982 body_cost_vec.release ();
1984 /* (2) Versioning to force alignment. */
1986 /* Try versioning if:
1987 1) flag_tree_vect_loop_version is TRUE
1988 2) optimize loop for speed
1989 3) there is at least one unsupported misaligned data ref with an unknown
1990 misalignment, and
1991 4) all misaligned data refs with a known misalignment are supported, and
1992 5) the number of runtime alignment checks is within reason. */
1994 do_versioning =
1995 flag_tree_vect_loop_version
1996 && optimize_loop_nest_for_speed_p (loop)
1997 && (!loop->inner); /* FORNOW */
1999 if (do_versioning)
2001 FOR_EACH_VEC_ELT (datarefs, i, dr)
2003 stmt = DR_STMT (dr);
2004 stmt_info = vinfo_for_stmt (stmt);
2006 /* For interleaving, only the alignment of the first access
2007 matters. */
2008 if (aligned_access_p (dr)
2009 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
2010 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
2011 continue;
2013 /* Strided loads perform only component accesses, alignment is
2014 irrelevant for them. */
2015 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
2016 continue;
2018 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
2020 if (!supportable_dr_alignment)
2022 gimple stmt;
2023 int mask;
2024 tree vectype;
2026 if (known_alignment_for_access_p (dr)
2027 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
2028 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2030 do_versioning = false;
2031 break;
2034 stmt = DR_STMT (dr);
2035 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2036 gcc_assert (vectype);
2038 /* The rightmost bits of an aligned address must be zeros.
2039 Construct the mask needed for this test. For example,
2040 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2041 mask must be 15 = 0xf. */
2042 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2044 /* FORNOW: use the same mask to test all potentially unaligned
2045 references in the loop. The vectorizer currently supports
2046 a single vector size, see the reference to
2047 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2048 vectorization factor is computed. */
2049 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2050 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2051 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2052 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
2053 DR_STMT (dr));
2057 /* Versioning requires at least one misaligned data reference. */
2058 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2059 do_versioning = false;
2060 else if (!do_versioning)
2061 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
2064 if (do_versioning)
2066 vec<gimple> may_misalign_stmts
2067 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2068 gimple stmt;
2070 /* It can now be assumed that the data references in the statements
2071 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2072 of the loop being vectorized. */
2073 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
2075 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2076 dr = STMT_VINFO_DATA_REF (stmt_info);
2077 SET_DR_MISALIGNMENT (dr, 0);
2078 if (dump_enabled_p ())
2079 dump_printf_loc (MSG_NOTE, vect_location,
2080 "Alignment of access forced using versioning.");
2083 if (dump_enabled_p ())
2084 dump_printf_loc (MSG_NOTE, vect_location,
2085 "Versioning for alignment will be applied.");
2087 /* Peeling and versioning can't be done together at this time. */
2088 gcc_assert (! (do_peeling && do_versioning));
2090 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2091 gcc_assert (stat);
2092 return stat;
2095 /* This point is reached if neither peeling nor versioning is being done. */
2096 gcc_assert (! (do_peeling || do_versioning));
2098 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2099 return stat;
2103 /* Function vect_find_same_alignment_drs.
2105 Update group and alignment relations according to the chosen
2106 vectorization factor. */
2108 static void
2109 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
2110 loop_vec_info loop_vinfo)
2112 unsigned int i;
2113 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2114 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2115 struct data_reference *dra = DDR_A (ddr);
2116 struct data_reference *drb = DDR_B (ddr);
2117 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2118 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2119 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
2120 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
2121 lambda_vector dist_v;
2122 unsigned int loop_depth;
2124 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2125 return;
2127 if (dra == drb)
2128 return;
2130 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2131 return;
2133 /* Loop-based vectorization and known data dependence. */
2134 if (DDR_NUM_DIST_VECTS (ddr) == 0)
2135 return;
2137 /* Data-dependence analysis reports a distance vector of zero
2138 for data-references that overlap only in the first iteration
2139 but have different sign step (see PR45764).
2140 So as a sanity check require equal DR_STEP. */
2141 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2142 return;
2144 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
2145 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
2147 int dist = dist_v[loop_depth];
2149 if (dump_enabled_p ())
2150 dump_printf_loc (MSG_NOTE, vect_location,
2151 "dependence distance = %d.", dist);
2153 /* Same loop iteration. */
2154 if (dist == 0
2155 || (dist % vectorization_factor == 0 && dra_size == drb_size))
2157 /* Two references with distance zero have the same alignment. */
2158 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
2159 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
2160 if (dump_enabled_p ())
2162 dump_printf_loc (MSG_NOTE, vect_location,
2163 "accesses have the same alignment.");
2164 dump_printf (MSG_NOTE,
2165 "dependence distance modulo vf == 0 between ");
2166 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2167 dump_printf (MSG_NOTE, " and ");
2168 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
2175 /* Function vect_analyze_data_refs_alignment
2177 Analyze the alignment of the data-references in the loop.
2178 Return FALSE if a data reference is found that cannot be vectorized. */
2180 bool
2181 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2182 bb_vec_info bb_vinfo)
2184 if (dump_enabled_p ())
2185 dump_printf_loc (MSG_NOTE, vect_location,
2186 "=== vect_analyze_data_refs_alignment ===");
2188 /* Mark groups of data references with same alignment using
2189 data dependence information. */
2190 if (loop_vinfo)
2192 vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2193 struct data_dependence_relation *ddr;
2194 unsigned int i;
2196 FOR_EACH_VEC_ELT (ddrs, i, ddr)
2197 vect_find_same_alignment_drs (ddr, loop_vinfo);
2200 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2202 if (dump_enabled_p ())
2203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2204 "not vectorized: can't calculate alignment "
2205 "for data ref.");
2206 return false;
2209 return true;
2213 /* Analyze groups of accesses: check that DR belongs to a group of
2214 accesses of legal size, step, etc. Detect gaps, single element
2215 interleaving, and other special cases. Set grouped access info.
2216 Collect groups of strided stores for further use in SLP analysis. */
2218 static bool
2219 vect_analyze_group_access (struct data_reference *dr)
2221 tree step = DR_STEP (dr);
2222 tree scalar_type = TREE_TYPE (DR_REF (dr));
2223 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2224 gimple stmt = DR_STMT (dr);
2225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2226 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2227 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2228 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2229 HOST_WIDE_INT groupsize, last_accessed_element = 1;
2230 bool slp_impossible = false;
2231 struct loop *loop = NULL;
2233 if (loop_vinfo)
2234 loop = LOOP_VINFO_LOOP (loop_vinfo);
2236 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2237 size of the interleaving group (including gaps). */
2238 groupsize = dr_step / type_size;
2240 /* Not consecutive access is possible only if it is a part of interleaving. */
2241 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2243 /* Check if it this DR is a part of interleaving, and is a single
2244 element of the group that is accessed in the loop. */
2246 /* Gaps are supported only for loads. STEP must be a multiple of the type
2247 size. The size of the group must be a power of 2. */
2248 if (DR_IS_READ (dr)
2249 && (dr_step % type_size) == 0
2250 && groupsize > 0
2251 && exact_log2 (groupsize) != -1)
2253 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2254 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2255 if (dump_enabled_p ())
2257 dump_printf_loc (MSG_NOTE, vect_location,
2258 "Detected single element interleaving ");
2259 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2260 dump_printf (MSG_NOTE, " step ");
2261 dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2264 if (loop_vinfo)
2266 if (dump_enabled_p ())
2267 dump_printf_loc (MSG_NOTE, vect_location,
2268 "Data access with gaps requires scalar "
2269 "epilogue loop");
2270 if (loop->inner)
2272 if (dump_enabled_p ())
2273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2274 "Peeling for outer loop is not"
2275 " supported");
2276 return false;
2279 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2282 return true;
2285 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2288 "not consecutive access ");
2289 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2292 if (bb_vinfo)
2294 /* Mark the statement as unvectorizable. */
2295 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2296 return true;
2299 return false;
2302 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2304 /* First stmt in the interleaving chain. Check the chain. */
2305 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2306 struct data_reference *data_ref = dr;
2307 unsigned int count = 1;
2308 tree next_step;
2309 tree prev_init = DR_INIT (data_ref);
2310 gimple prev = stmt;
2311 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2313 while (next)
2315 /* Skip same data-refs. In case that two or more stmts share
2316 data-ref (supported only for loads), we vectorize only the first
2317 stmt, and the rest get their vectorized loads from the first
2318 one. */
2319 if (!tree_int_cst_compare (DR_INIT (data_ref),
2320 DR_INIT (STMT_VINFO_DATA_REF (
2321 vinfo_for_stmt (next)))))
2323 if (DR_IS_WRITE (data_ref))
2325 if (dump_enabled_p ())
2326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2327 "Two store stmts share the same dr.");
2328 return false;
2331 /* Check that there is no load-store dependencies for this loads
2332 to prevent a case of load-store-load to the same location. */
2333 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2334 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2336 if (dump_enabled_p ())
2337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2338 "READ_WRITE dependence in interleaving.");
2339 return false;
2342 /* For load use the same data-ref load. */
2343 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2345 prev = next;
2346 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2347 continue;
2350 prev = next;
2352 /* Check that all the accesses have the same STEP. */
2353 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2354 if (tree_int_cst_compare (step, next_step))
2356 if (dump_enabled_p ())
2357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2358 "not consecutive access in interleaving");
2359 return false;
2362 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2363 /* Check that the distance between two accesses is equal to the type
2364 size. Otherwise, we have gaps. */
2365 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2366 - TREE_INT_CST_LOW (prev_init)) / type_size;
2367 if (diff != 1)
2369 /* FORNOW: SLP of accesses with gaps is not supported. */
2370 slp_impossible = true;
2371 if (DR_IS_WRITE (data_ref))
2373 if (dump_enabled_p ())
2374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2375 "interleaved store with gaps");
2376 return false;
2379 gaps += diff - 1;
2382 last_accessed_element += diff;
2384 /* Store the gap from the previous member of the group. If there is no
2385 gap in the access, GROUP_GAP is always 1. */
2386 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2388 prev_init = DR_INIT (data_ref);
2389 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2390 /* Count the number of data-refs in the chain. */
2391 count++;
2394 /* COUNT is the number of accesses found, we multiply it by the size of
2395 the type to get COUNT_IN_BYTES. */
2396 count_in_bytes = type_size * count;
2398 /* Check that the size of the interleaving (including gaps) is not
2399 greater than STEP. */
2400 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2402 if (dump_enabled_p ())
2404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2405 "interleaving size is greater than step for ");
2406 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dr));
2408 return false;
2411 /* Check that the size of the interleaving is equal to STEP for stores,
2412 i.e., that there are no gaps. */
2413 if (dr_step && dr_step != count_in_bytes)
2415 if (DR_IS_READ (dr))
2417 slp_impossible = true;
2418 /* There is a gap after the last load in the group. This gap is a
2419 difference between the groupsize and the number of elements.
2420 When there is no gap, this difference should be 0. */
2421 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
2423 else
2425 if (dump_enabled_p ())
2426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2427 "interleaved store with gaps");
2428 return false;
2432 /* Check that STEP is a multiple of type size. */
2433 if (dr_step && (dr_step % type_size) != 0)
2435 if (dump_enabled_p ())
2437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2438 "step is not a multiple of type size: step ");
2439 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
2440 dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
2441 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2442 TYPE_SIZE_UNIT (scalar_type));
2444 return false;
2447 if (groupsize == 0)
2448 groupsize = count;
2450 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2451 if (dump_enabled_p ())
2452 dump_printf_loc (MSG_NOTE, vect_location,
2453 "Detected interleaving of size %d", (int)groupsize);
2455 /* SLP: create an SLP data structure for every interleaving group of
2456 stores for further analysis in vect_analyse_slp. */
2457 if (DR_IS_WRITE (dr) && !slp_impossible)
2459 if (loop_vinfo)
2460 LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
2461 if (bb_vinfo)
2462 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
2465 /* There is a gap in the end of the group. */
2466 if (groupsize - last_accessed_element > 0 && loop_vinfo)
2468 if (dump_enabled_p ())
2469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2470 "Data access with gaps requires scalar "
2471 "epilogue loop");
2472 if (loop->inner)
2474 if (dump_enabled_p ())
2475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2476 "Peeling for outer loop is not supported");
2477 return false;
2480 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2484 return true;
2488 /* Analyze the access pattern of the data-reference DR.
2489 In case of non-consecutive accesses call vect_analyze_group_access() to
2490 analyze groups of accesses. */
2492 static bool
2493 vect_analyze_data_ref_access (struct data_reference *dr)
2495 tree step = DR_STEP (dr);
2496 tree scalar_type = TREE_TYPE (DR_REF (dr));
2497 gimple stmt = DR_STMT (dr);
2498 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2499 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2500 struct loop *loop = NULL;
2502 if (loop_vinfo)
2503 loop = LOOP_VINFO_LOOP (loop_vinfo);
2505 if (loop_vinfo && !step)
2507 if (dump_enabled_p ())
2508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2509 "bad data-ref access in loop");
2510 return false;
2513 /* Allow invariant loads in loops. */
2514 if (loop_vinfo && integer_zerop (step))
2516 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2517 return DR_IS_READ (dr);
2520 if (loop && nested_in_vect_loop_p (loop, stmt))
2522 /* Interleaved accesses are not yet supported within outer-loop
2523 vectorization for references in the inner-loop. */
2524 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2526 /* For the rest of the analysis we use the outer-loop step. */
2527 step = STMT_VINFO_DR_STEP (stmt_info);
2528 if (integer_zerop (step))
2530 if (dump_enabled_p ())
2531 dump_printf_loc (MSG_NOTE, vect_location,
2532 "zero step in outer loop.");
2533 if (DR_IS_READ (dr))
2534 return true;
2535 else
2536 return false;
2540 /* Consecutive? */
2541 if (TREE_CODE (step) == INTEGER_CST)
2543 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2544 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2545 || (dr_step < 0
2546 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2548 /* Mark that it is not interleaving. */
2549 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2550 return true;
2554 if (loop && nested_in_vect_loop_p (loop, stmt))
2556 if (dump_enabled_p ())
2557 dump_printf_loc (MSG_NOTE, vect_location,
2558 "grouped access in outer loop.");
2559 return false;
2562 /* Assume this is a DR handled by non-constant strided load case. */
2563 if (TREE_CODE (step) != INTEGER_CST)
2564 return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2566 /* Not consecutive access - check if it's a part of interleaving group. */
2567 return vect_analyze_group_access (dr);
2571 /* Function vect_analyze_data_ref_accesses.
2573 Analyze the access pattern of all the data references in the loop.
2575 FORNOW: the only access pattern that is considered vectorizable is a
2576 simple step 1 (consecutive) access.
2578 FORNOW: handle only arrays and pointer accesses. */
2580 bool
2581 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2583 unsigned int i;
2584 vec<data_reference_p> datarefs;
2585 struct data_reference *dr;
2587 if (dump_enabled_p ())
2588 dump_printf_loc (MSG_NOTE, vect_location,
2589 "=== vect_analyze_data_ref_accesses ===");
2591 if (loop_vinfo)
2592 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2593 else
2594 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2596 FOR_EACH_VEC_ELT (datarefs, i, dr)
2597 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2598 && !vect_analyze_data_ref_access (dr))
2600 if (dump_enabled_p ())
2601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2602 "not vectorized: complicated access pattern.");
2604 if (bb_vinfo)
2606 /* Mark the statement as not vectorizable. */
2607 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2608 continue;
2610 else
2611 return false;
2614 return true;
2617 /* Function vect_prune_runtime_alias_test_list.
2619 Prune a list of ddrs to be tested at run-time by versioning for alias.
2620 Return FALSE if resulting list of ddrs is longer then allowed by
2621 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2623 bool
2624 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2626 vec<ddr_p> ddrs =
2627 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2628 unsigned i, j;
2630 if (dump_enabled_p ())
2631 dump_printf_loc (MSG_NOTE, vect_location,
2632 "=== vect_prune_runtime_alias_test_list ===");
2634 for (i = 0; i < ddrs.length (); )
2636 bool found;
2637 ddr_p ddr_i;
2639 ddr_i = ddrs[i];
2640 found = false;
2642 for (j = 0; j < i; j++)
2644 ddr_p ddr_j = ddrs[j];
2646 if (vect_vfa_range_equal (ddr_i, ddr_j))
2648 if (dump_enabled_p ())
2650 dump_printf_loc (MSG_NOTE, vect_location,
2651 "found equal ranges ");
2652 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr_i)));
2653 dump_printf (MSG_NOTE, ", ");
2654 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr_i)));
2655 dump_printf (MSG_NOTE, " and ");
2656 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr_j)));
2657 dump_printf (MSG_NOTE, ", ");
2658 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr_j)));
2660 found = true;
2661 break;
2665 if (found)
2667 ddrs.ordered_remove (i);
2668 continue;
2670 i++;
2673 if (ddrs.length () >
2674 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2676 if (dump_enabled_p ())
2678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2679 "disable versioning for alias - max number of "
2680 "generated checks exceeded.");
2683 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).truncate (0);
2685 return false;
2688 return true;
2691 /* Check whether a non-affine read in stmt is suitable for gather load
2692 and if so, return a builtin decl for that operation. */
2694 tree
2695 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2696 tree *offp, int *scalep)
2698 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2699 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2700 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2701 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2702 tree offtype = NULL_TREE;
2703 tree decl, base, off;
2704 enum machine_mode pmode;
2705 int punsignedp, pvolatilep;
2707 /* The gather builtins need address of the form
2708 loop_invariant + vector * {1, 2, 4, 8}
2710 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2711 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2712 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2713 multiplications and additions in it. To get a vector, we need
2714 a single SSA_NAME that will be defined in the loop and will
2715 contain everything that is not loop invariant and that can be
2716 vectorized. The following code attempts to find such a preexistng
2717 SSA_NAME OFF and put the loop invariants into a tree BASE
2718 that can be gimplified before the loop. */
2719 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2720 &pmode, &punsignedp, &pvolatilep, false);
2721 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2723 if (TREE_CODE (base) == MEM_REF)
2725 if (!integer_zerop (TREE_OPERAND (base, 1)))
2727 if (off == NULL_TREE)
2729 double_int moff = mem_ref_offset (base);
2730 off = double_int_to_tree (sizetype, moff);
2732 else
2733 off = size_binop (PLUS_EXPR, off,
2734 fold_convert (sizetype, TREE_OPERAND (base, 1)));
2736 base = TREE_OPERAND (base, 0);
2738 else
2739 base = build_fold_addr_expr (base);
2741 if (off == NULL_TREE)
2742 off = size_zero_node;
2744 /* If base is not loop invariant, either off is 0, then we start with just
2745 the constant offset in the loop invariant BASE and continue with base
2746 as OFF, otherwise give up.
2747 We could handle that case by gimplifying the addition of base + off
2748 into some SSA_NAME and use that as off, but for now punt. */
2749 if (!expr_invariant_in_loop_p (loop, base))
2751 if (!integer_zerop (off))
2752 return NULL_TREE;
2753 off = base;
2754 base = size_int (pbitpos / BITS_PER_UNIT);
2756 /* Otherwise put base + constant offset into the loop invariant BASE
2757 and continue with OFF. */
2758 else
2760 base = fold_convert (sizetype, base);
2761 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2764 /* OFF at this point may be either a SSA_NAME or some tree expression
2765 from get_inner_reference. Try to peel off loop invariants from it
2766 into BASE as long as possible. */
2767 STRIP_NOPS (off);
2768 while (offtype == NULL_TREE)
2770 enum tree_code code;
2771 tree op0, op1, add = NULL_TREE;
2773 if (TREE_CODE (off) == SSA_NAME)
2775 gimple def_stmt = SSA_NAME_DEF_STMT (off);
2777 if (expr_invariant_in_loop_p (loop, off))
2778 return NULL_TREE;
2780 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2781 break;
2783 op0 = gimple_assign_rhs1 (def_stmt);
2784 code = gimple_assign_rhs_code (def_stmt);
2785 op1 = gimple_assign_rhs2 (def_stmt);
2787 else
2789 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2790 return NULL_TREE;
2791 code = TREE_CODE (off);
2792 extract_ops_from_tree (off, &code, &op0, &op1);
2794 switch (code)
2796 case POINTER_PLUS_EXPR:
2797 case PLUS_EXPR:
2798 if (expr_invariant_in_loop_p (loop, op0))
2800 add = op0;
2801 off = op1;
2802 do_add:
2803 add = fold_convert (sizetype, add);
2804 if (scale != 1)
2805 add = size_binop (MULT_EXPR, add, size_int (scale));
2806 base = size_binop (PLUS_EXPR, base, add);
2807 continue;
2809 if (expr_invariant_in_loop_p (loop, op1))
2811 add = op1;
2812 off = op0;
2813 goto do_add;
2815 break;
2816 case MINUS_EXPR:
2817 if (expr_invariant_in_loop_p (loop, op1))
2819 add = fold_convert (sizetype, op1);
2820 add = size_binop (MINUS_EXPR, size_zero_node, add);
2821 off = op0;
2822 goto do_add;
2824 break;
2825 case MULT_EXPR:
2826 if (scale == 1 && host_integerp (op1, 0))
2828 scale = tree_low_cst (op1, 0);
2829 off = op0;
2830 continue;
2832 break;
2833 case SSA_NAME:
2834 off = op0;
2835 continue;
2836 CASE_CONVERT:
2837 if (!POINTER_TYPE_P (TREE_TYPE (op0))
2838 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2839 break;
2840 if (TYPE_PRECISION (TREE_TYPE (op0))
2841 == TYPE_PRECISION (TREE_TYPE (off)))
2843 off = op0;
2844 continue;
2846 if (TYPE_PRECISION (TREE_TYPE (op0))
2847 < TYPE_PRECISION (TREE_TYPE (off)))
2849 off = op0;
2850 offtype = TREE_TYPE (off);
2851 STRIP_NOPS (off);
2852 continue;
2854 break;
2855 default:
2856 break;
2858 break;
2861 /* If at the end OFF still isn't a SSA_NAME or isn't
2862 defined in the loop, punt. */
2863 if (TREE_CODE (off) != SSA_NAME
2864 || expr_invariant_in_loop_p (loop, off))
2865 return NULL_TREE;
2867 if (offtype == NULL_TREE)
2868 offtype = TREE_TYPE (off);
2870 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2871 offtype, scale);
2872 if (decl == NULL_TREE)
2873 return NULL_TREE;
2875 if (basep)
2876 *basep = base;
2877 if (offp)
2878 *offp = off;
2879 if (scalep)
2880 *scalep = scale;
2881 return decl;
2884 /* Check wether a non-affine load in STMT (being in the loop referred to
2885 in LOOP_VINFO) is suitable for handling as strided load. That is the case
2886 if its address is a simple induction variable. If so return the base
2887 of that induction variable in *BASEP and the (loop-invariant) step
2888 in *STEPP, both only when that pointer is non-zero.
2890 This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
2891 base pointer) only. */
2893 bool
2894 vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2895 tree *stepp)
2897 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2898 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2899 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2900 tree base, off;
2901 affine_iv iv;
2903 if (!DR_IS_READ (dr))
2904 return false;
2906 base = DR_REF (dr);
2908 if (TREE_CODE (base) == ARRAY_REF)
2910 off = TREE_OPERAND (base, 1);
2911 base = TREE_OPERAND (base, 0);
2913 else if (TREE_CODE (base) == MEM_REF)
2915 off = TREE_OPERAND (base, 0);
2916 base = TREE_OPERAND (base, 1);
2918 else
2919 return false;
2921 if (TREE_CODE (off) != SSA_NAME)
2922 return false;
2924 if (!expr_invariant_in_loop_p (loop, base)
2925 || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
2926 return false;
2928 if (basep)
2929 *basep = iv.base;
2930 if (stepp)
2931 *stepp = iv.step;
2932 return true;
2935 /* Function vect_analyze_data_refs.
2937 Find all the data references in the loop or basic block.
2939 The general structure of the analysis of data refs in the vectorizer is as
2940 follows:
2941 1- vect_analyze_data_refs(loop/bb): call
2942 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2943 in the loop/bb and their dependences.
2944 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2945 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2946 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2950 bool
2951 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2952 bb_vec_info bb_vinfo,
2953 int *min_vf)
2955 struct loop *loop = NULL;
2956 basic_block bb = NULL;
2957 unsigned int i;
2958 vec<data_reference_p> datarefs;
2959 struct data_reference *dr;
2960 tree scalar_type;
2961 bool res, stop_bb_analysis = false;
2963 if (dump_enabled_p ())
2964 dump_printf_loc (MSG_NOTE, vect_location,
2965 "=== vect_analyze_data_refs ===\n");
2967 if (loop_vinfo)
2969 loop = LOOP_VINFO_LOOP (loop_vinfo);
2970 res = compute_data_dependences_for_loop
2971 (loop, true,
2972 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2973 &LOOP_VINFO_DATAREFS (loop_vinfo),
2974 &LOOP_VINFO_DDRS (loop_vinfo));
2976 if (!res)
2978 if (dump_enabled_p ())
2979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2980 "not vectorized: loop contains function calls"
2981 " or data references that cannot be analyzed");
2982 return false;
2985 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2987 else
2989 gimple_stmt_iterator gsi;
2991 bb = BB_VINFO_BB (bb_vinfo);
2992 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2994 gimple stmt = gsi_stmt (gsi);
2995 if (!find_data_references_in_stmt (NULL, stmt,
2996 &BB_VINFO_DATAREFS (bb_vinfo)))
2998 /* Mark the rest of the basic-block as unvectorizable. */
2999 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3001 stmt = gsi_stmt (gsi);
3002 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
3004 break;
3007 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
3008 &BB_VINFO_DDRS (bb_vinfo),
3009 vNULL, true))
3011 if (dump_enabled_p ())
3012 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3013 "not vectorized: basic block contains function"
3014 " calls or data references that cannot be"
3015 " analyzed");
3016 return false;
3019 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
3022 /* Go through the data-refs, check that the analysis succeeded. Update
3023 pointer from stmt_vec_info struct to DR and vectype. */
3025 FOR_EACH_VEC_ELT (datarefs, i, dr)
3027 gimple stmt;
3028 stmt_vec_info stmt_info;
3029 tree base, offset, init;
3030 bool gather = false;
3031 int vf;
3033 if (!dr || !DR_REF (dr))
3035 if (dump_enabled_p ())
3036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3037 "not vectorized: unhandled data-ref ");
3038 return false;
3041 stmt = DR_STMT (dr);
3042 stmt_info = vinfo_for_stmt (stmt);
3044 if (stop_bb_analysis)
3046 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3047 continue;
3050 /* Check that analysis of the data-ref succeeded. */
3051 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3052 || !DR_STEP (dr))
3054 /* If target supports vector gather loads, see if they can't
3055 be used. */
3056 if (loop_vinfo
3057 && DR_IS_READ (dr)
3058 && !TREE_THIS_VOLATILE (DR_REF (dr))
3059 && targetm.vectorize.builtin_gather != NULL
3060 && !nested_in_vect_loop_p (loop, stmt))
3062 struct data_reference *newdr
3063 = create_data_ref (NULL, loop_containing_stmt (stmt),
3064 DR_REF (dr), stmt, true);
3065 gcc_assert (newdr != NULL && DR_REF (newdr));
3066 if (DR_BASE_ADDRESS (newdr)
3067 && DR_OFFSET (newdr)
3068 && DR_INIT (newdr)
3069 && DR_STEP (newdr)
3070 && integer_zerop (DR_STEP (newdr)))
3072 dr = newdr;
3073 gather = true;
3075 else
3076 free_data_ref (newdr);
3079 if (!gather)
3081 if (dump_enabled_p ())
3083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3084 "not vectorized: data ref analysis "
3085 "failed ");
3086 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3089 if (bb_vinfo)
3091 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3092 stop_bb_analysis = true;
3093 continue;
3096 return false;
3100 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3102 if (dump_enabled_p ())
3103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3104 "not vectorized: base addr of dr is a "
3105 "constant");
3107 if (bb_vinfo)
3109 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3110 stop_bb_analysis = true;
3111 continue;
3114 if (gather)
3115 free_data_ref (dr);
3116 return false;
3119 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3121 if (dump_enabled_p ())
3123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3124 "not vectorized: volatile type ");
3125 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3128 if (bb_vinfo)
3130 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3131 stop_bb_analysis = true;
3132 continue;
3135 return false;
3138 if (stmt_can_throw_internal (stmt))
3140 if (dump_enabled_p ())
3142 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3143 "not vectorized: statement can throw an "
3144 "exception ");
3145 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3148 if (bb_vinfo)
3150 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3151 stop_bb_analysis = true;
3152 continue;
3155 if (gather)
3156 free_data_ref (dr);
3157 return false;
3160 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3161 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3163 if (dump_enabled_p ())
3165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3166 "not vectorized: statement is bitfield "
3167 "access ");
3168 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3171 if (bb_vinfo)
3173 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3174 stop_bb_analysis = true;
3175 continue;
3178 if (gather)
3179 free_data_ref (dr);
3180 return false;
3183 base = unshare_expr (DR_BASE_ADDRESS (dr));
3184 offset = unshare_expr (DR_OFFSET (dr));
3185 init = unshare_expr (DR_INIT (dr));
3187 if (is_gimple_call (stmt))
3189 if (dump_enabled_p ())
3191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3192 "not vectorized: dr in a call ");
3193 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3196 if (bb_vinfo)
3198 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3199 stop_bb_analysis = true;
3200 continue;
3203 if (gather)
3204 free_data_ref (dr);
3205 return false;
3208 /* Update DR field in stmt_vec_info struct. */
3210 /* If the dataref is in an inner-loop of the loop that is considered for
3211 for vectorization, we also want to analyze the access relative to
3212 the outer-loop (DR contains information only relative to the
3213 inner-most enclosing loop). We do that by building a reference to the
3214 first location accessed by the inner-loop, and analyze it relative to
3215 the outer-loop. */
3216 if (loop && nested_in_vect_loop_p (loop, stmt))
3218 tree outer_step, outer_base, outer_init;
3219 HOST_WIDE_INT pbitsize, pbitpos;
3220 tree poffset;
3221 enum machine_mode pmode;
3222 int punsignedp, pvolatilep;
3223 affine_iv base_iv, offset_iv;
3224 tree dinit;
3226 /* Build a reference to the first location accessed by the
3227 inner-loop: *(BASE+INIT). (The first location is actually
3228 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3229 tree inner_base = build_fold_indirect_ref
3230 (fold_build_pointer_plus (base, init));
3232 if (dump_enabled_p ())
3234 dump_printf_loc (MSG_NOTE, vect_location,
3235 "analyze in outer-loop: ");
3236 dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
3239 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3240 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3241 gcc_assert (outer_base != NULL_TREE);
3243 if (pbitpos % BITS_PER_UNIT != 0)
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3247 "failed: bit offset alignment.\n");
3248 return false;
3251 outer_base = build_fold_addr_expr (outer_base);
3252 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3253 &base_iv, false))
3255 if (dump_enabled_p ())
3256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3257 "failed: evolution of base is not affine.\n");
3258 return false;
3261 if (offset)
3263 if (poffset)
3264 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
3265 poffset);
3266 else
3267 poffset = offset;
3270 if (!poffset)
3272 offset_iv.base = ssize_int (0);
3273 offset_iv.step = ssize_int (0);
3275 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3276 &offset_iv, false))
3278 if (dump_enabled_p ())
3279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3280 "evolution of offset is not affine.\n");
3281 return false;
3284 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3285 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3286 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3287 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3288 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3290 outer_step = size_binop (PLUS_EXPR,
3291 fold_convert (ssizetype, base_iv.step),
3292 fold_convert (ssizetype, offset_iv.step));
3294 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3295 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3296 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3297 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3298 STMT_VINFO_DR_OFFSET (stmt_info) =
3299 fold_convert (ssizetype, offset_iv.base);
3300 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3301 size_int (highest_pow2_factor (offset_iv.base));
3303 if (dump_enabled_p ())
3305 dump_printf_loc (MSG_NOTE, vect_location,
3306 "\touter base_address: ");
3307 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3308 STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3309 dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
3310 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3311 STMT_VINFO_DR_OFFSET (stmt_info));
3312 dump_printf (MSG_NOTE,
3313 "\n\touter constant offset from base address: ");
3314 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3315 STMT_VINFO_DR_INIT (stmt_info));
3316 dump_printf (MSG_NOTE, "\n\touter step: ");
3317 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3318 STMT_VINFO_DR_STEP (stmt_info));
3319 dump_printf (MSG_NOTE, "\n\touter aligned to: ");
3320 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3321 STMT_VINFO_DR_ALIGNED_TO (stmt_info));
3325 if (STMT_VINFO_DATA_REF (stmt_info))
3327 if (dump_enabled_p ())
3329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3330 "not vectorized: more than one data ref "
3331 "in stmt: ");
3332 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3335 if (bb_vinfo)
3337 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3338 stop_bb_analysis = true;
3339 continue;
3342 if (gather)
3343 free_data_ref (dr);
3344 return false;
3347 STMT_VINFO_DATA_REF (stmt_info) = dr;
3349 /* Set vectype for STMT. */
3350 scalar_type = TREE_TYPE (DR_REF (dr));
3351 STMT_VINFO_VECTYPE (stmt_info) =
3352 get_vectype_for_scalar_type (scalar_type);
3353 if (!STMT_VINFO_VECTYPE (stmt_info))
3355 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3358 "not vectorized: no vectype for stmt: ");
3359 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3360 dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
3361 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
3362 scalar_type);
3365 if (bb_vinfo)
3367 /* Mark the statement as not vectorizable. */
3368 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3369 stop_bb_analysis = true;
3370 continue;
3373 if (gather)
3375 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3376 free_data_ref (dr);
3378 return false;
3381 /* Adjust the minimal vectorization factor according to the
3382 vector type. */
3383 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3384 if (vf > *min_vf)
3385 *min_vf = vf;
3387 if (gather)
3389 unsigned int j, k, n;
3390 struct data_reference *olddr
3391 = datarefs[i];
3392 vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3393 struct data_dependence_relation *ddr, *newddr;
3394 bool bad = false;
3395 tree off;
3396 vec<loop_p> nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3398 gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3399 if (gather
3400 && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3401 gather = false;
3402 if (!gather)
3404 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3405 free_data_ref (dr);
3406 if (dump_enabled_p ())
3408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3409 "not vectorized: not suitable for gather "
3410 "load ");
3411 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3413 return false;
3416 n = datarefs.length () - 1;
3417 for (j = 0, k = i - 1; j < i; j++)
3419 ddr = ddrs[k];
3420 gcc_assert (DDR_B (ddr) == olddr);
3421 newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3422 nest);
3423 ddrs[k] = newddr;
3424 free_dependence_relation (ddr);
3425 if (!bad
3426 && DR_IS_WRITE (DDR_A (newddr))
3427 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3428 bad = true;
3429 k += --n;
3432 k++;
3433 n = k + datarefs.length () - i - 1;
3434 for (; k < n; k++)
3436 ddr = ddrs[k];
3437 gcc_assert (DDR_A (ddr) == olddr);
3438 newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3439 nest);
3440 ddrs[k] = newddr;
3441 free_dependence_relation (ddr);
3442 if (!bad
3443 && DR_IS_WRITE (DDR_B (newddr))
3444 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3445 bad = true;
3448 k = ddrs.length ()
3449 - datarefs.length () + i;
3450 ddr = ddrs[k];
3451 gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3452 newddr = initialize_data_dependence_relation (dr, dr, nest);
3453 ddrs[k] = newddr;
3454 free_dependence_relation (ddr);
3455 datarefs[i] = dr;
3457 if (bad)
3459 if (dump_enabled_p ())
3461 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3462 "not vectorized: data dependence conflict"
3463 " prevents gather load");
3464 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3466 return false;
3469 STMT_VINFO_GATHER_P (stmt_info) = true;
3471 else if (loop_vinfo
3472 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3474 bool strided_load = false;
3475 if (!nested_in_vect_loop_p (loop, stmt))
3476 strided_load
3477 = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
3478 if (!strided_load)
3480 if (dump_enabled_p ())
3482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3483 "not vectorized: not suitable for strided "
3484 "load ");
3485 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3487 return false;
3489 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
3493 return true;
3497 /* Function vect_get_new_vect_var.
3499 Returns a name for a new variable. The current naming scheme appends the
3500 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3501 the name of vectorizer generated variables, and appends that to NAME if
3502 provided. */
3504 tree
3505 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3507 const char *prefix;
3508 tree new_vect_var;
3510 switch (var_kind)
3512 case vect_simple_var:
3513 prefix = "vect_";
3514 break;
3515 case vect_scalar_var:
3516 prefix = "stmp_";
3517 break;
3518 case vect_pointer_var:
3519 prefix = "vect_p";
3520 break;
3521 default:
3522 gcc_unreachable ();
3525 if (name)
3527 char* tmp = concat (prefix, name, NULL);
3528 new_vect_var = create_tmp_reg (type, tmp);
3529 free (tmp);
3531 else
3532 new_vect_var = create_tmp_reg (type, prefix);
3534 return new_vect_var;
3538 /* Function vect_create_addr_base_for_vector_ref.
3540 Create an expression that computes the address of the first memory location
3541 that will be accessed for a data reference.
3543 Input:
3544 STMT: The statement containing the data reference.
3545 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3546 OFFSET: Optional. If supplied, it is be added to the initial address.
3547 LOOP: Specify relative to which loop-nest should the address be computed.
3548 For example, when the dataref is in an inner-loop nested in an
3549 outer-loop that is now being vectorized, LOOP can be either the
3550 outer-loop, or the inner-loop. The first memory location accessed
3551 by the following dataref ('in' points to short):
3553 for (i=0; i<N; i++)
3554 for (j=0; j<M; j++)
3555 s += in[i+j]
3557 is as follows:
3558 if LOOP=i_loop: &in (relative to i_loop)
3559 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3561 Output:
3562 1. Return an SSA_NAME whose value is the address of the memory location of
3563 the first vector of the data reference.
3564 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3565 these statement(s) which define the returned SSA_NAME.
3567 FORNOW: We are only handling array accesses with step 1. */
3569 tree
3570 vect_create_addr_base_for_vector_ref (gimple stmt,
3571 gimple_seq *new_stmt_list,
3572 tree offset,
3573 struct loop *loop)
3575 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3576 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3577 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3578 const char *base_name;
3579 tree data_ref_base_var;
3580 tree vec_stmt;
3581 tree addr_base, addr_expr;
3582 tree dest;
3583 gimple_seq seq = NULL;
3584 tree base_offset = unshare_expr (DR_OFFSET (dr));
3585 tree init = unshare_expr (DR_INIT (dr));
3586 tree vect_ptr_type;
3587 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3588 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3589 tree base;
3591 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3593 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3595 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3597 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3598 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3599 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3602 if (loop_vinfo)
3603 base_name = get_name (data_ref_base);
3604 else
3606 base_offset = ssize_int (0);
3607 init = ssize_int (0);
3608 base_name = get_name (DR_REF (dr));
3611 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3612 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3613 data_ref_base_var);
3614 gimple_seq_add_seq (new_stmt_list, seq);
3616 /* Create base_offset */
3617 base_offset = size_binop (PLUS_EXPR,
3618 fold_convert (sizetype, base_offset),
3619 fold_convert (sizetype, init));
3620 dest = create_tmp_var (sizetype, "base_off");
3621 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3622 gimple_seq_add_seq (new_stmt_list, seq);
3624 if (offset)
3626 tree tmp = create_tmp_var (sizetype, "offset");
3628 offset = fold_build2 (MULT_EXPR, sizetype,
3629 fold_convert (sizetype, offset), step);
3630 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3631 base_offset, offset);
3632 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3633 gimple_seq_add_seq (new_stmt_list, seq);
3636 /* base + base_offset */
3637 if (loop_vinfo)
3638 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3639 else
3641 addr_base = build1 (ADDR_EXPR,
3642 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3643 unshare_expr (DR_REF (dr)));
3646 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3647 base = get_base_address (DR_REF (dr));
3648 if (base
3649 && TREE_CODE (base) == MEM_REF)
3650 vect_ptr_type
3651 = build_qualified_type (vect_ptr_type,
3652 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3654 vec_stmt = fold_convert (vect_ptr_type, addr_base);
3655 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3656 base_name);
3657 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3658 gimple_seq_add_seq (new_stmt_list, seq);
3660 if (DR_PTR_INFO (dr)
3661 && TREE_CODE (vec_stmt) == SSA_NAME)
3663 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3664 if (offset)
3665 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (vec_stmt));
3668 if (dump_enabled_p ())
3670 dump_printf_loc (MSG_NOTE, vect_location, "created ");
3671 dump_generic_expr (MSG_NOTE, TDF_SLIM, vec_stmt);
3674 return vec_stmt;
3678 /* Function vect_create_data_ref_ptr.
3680 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3681 location accessed in the loop by STMT, along with the def-use update
3682 chain to appropriately advance the pointer through the loop iterations.
3683 Also set aliasing information for the pointer. This pointer is used by
3684 the callers to this function to create a memory reference expression for
3685 vector load/store access.
3687 Input:
3688 1. STMT: a stmt that references memory. Expected to be of the form
3689 GIMPLE_ASSIGN <name, data-ref> or
3690 GIMPLE_ASSIGN <data-ref, name>.
3691 2. AGGR_TYPE: the type of the reference, which should be either a vector
3692 or an array.
3693 3. AT_LOOP: the loop where the vector memref is to be created.
3694 4. OFFSET (optional): an offset to be added to the initial address accessed
3695 by the data-ref in STMT.
3696 5. BSI: location where the new stmts are to be placed if there is no loop
3697 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3698 pointing to the initial address.
3700 Output:
3701 1. Declare a new ptr to vector_type, and have it point to the base of the
3702 data reference (initial addressed accessed by the data reference).
3703 For example, for vector of type V8HI, the following code is generated:
3705 v8hi *ap;
3706 ap = (v8hi *)initial_address;
3708 if OFFSET is not supplied:
3709 initial_address = &a[init];
3710 if OFFSET is supplied:
3711 initial_address = &a[init + OFFSET];
3713 Return the initial_address in INITIAL_ADDRESS.
3715 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3716 update the pointer in each iteration of the loop.
3718 Return the increment stmt that updates the pointer in PTR_INCR.
3720 3. Set INV_P to true if the access pattern of the data reference in the
3721 vectorized loop is invariant. Set it to false otherwise.
3723 4. Return the pointer. */
3725 tree
3726 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3727 tree offset, tree *initial_address,
3728 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3729 bool only_init, bool *inv_p)
3731 const char *base_name;
3732 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3733 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3734 struct loop *loop = NULL;
3735 bool nested_in_vect_loop = false;
3736 struct loop *containing_loop = NULL;
3737 tree aggr_ptr_type;
3738 tree aggr_ptr;
3739 tree new_temp;
3740 gimple vec_stmt;
3741 gimple_seq new_stmt_list = NULL;
3742 edge pe = NULL;
3743 basic_block new_bb;
3744 tree aggr_ptr_init;
3745 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3746 tree aptr;
3747 gimple_stmt_iterator incr_gsi;
3748 bool insert_after;
3749 bool negative;
3750 tree indx_before_incr, indx_after_incr;
3751 gimple incr;
3752 tree step;
3753 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3754 tree base;
3756 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3757 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3759 if (loop_vinfo)
3761 loop = LOOP_VINFO_LOOP (loop_vinfo);
3762 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3763 containing_loop = (gimple_bb (stmt))->loop_father;
3764 pe = loop_preheader_edge (loop);
3766 else
3768 gcc_assert (bb_vinfo);
3769 only_init = true;
3770 *ptr_incr = NULL;
3773 /* Check the step (evolution) of the load in LOOP, and record
3774 whether it's invariant. */
3775 if (nested_in_vect_loop)
3776 step = STMT_VINFO_DR_STEP (stmt_info);
3777 else
3778 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3780 if (tree_int_cst_compare (step, size_zero_node) == 0)
3781 *inv_p = true;
3782 else
3783 *inv_p = false;
3784 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3786 /* Create an expression for the first address accessed by this load
3787 in LOOP. */
3788 base_name = get_name (DR_BASE_ADDRESS (dr));
3790 if (dump_enabled_p ())
3792 tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
3793 dump_printf_loc (MSG_NOTE, vect_location,
3794 "create %s-pointer variable to type: ",
3795 tree_code_name[(int) TREE_CODE (aggr_type)]);
3796 dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
3797 if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
3798 dump_printf (MSG_NOTE, " vectorizing an array ref: ");
3799 else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
3800 dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
3801 else
3802 dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
3803 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
3806 /* (1) Create the new aggregate-pointer variable. */
3807 aggr_ptr_type = build_pointer_type (aggr_type);
3808 base = get_base_address (DR_REF (dr));
3809 if (base
3810 && TREE_CODE (base) == MEM_REF)
3811 aggr_ptr_type
3812 = build_qualified_type (aggr_ptr_type,
3813 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3814 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
3816 /* Vector and array types inherit the alias set of their component
3817 type by default so we need to use a ref-all pointer if the data
3818 reference does not conflict with the created aggregated data
3819 reference because it is not addressable. */
3820 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3821 get_alias_set (DR_REF (dr))))
3823 aggr_ptr_type
3824 = build_pointer_type_for_mode (aggr_type,
3825 TYPE_MODE (aggr_ptr_type), true);
3826 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3827 base_name);
3830 /* Likewise for any of the data references in the stmt group. */
3831 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3833 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3836 tree lhs = gimple_assign_lhs (orig_stmt);
3837 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3838 get_alias_set (lhs)))
3840 aggr_ptr_type
3841 = build_pointer_type_for_mode (aggr_type,
3842 TYPE_MODE (aggr_ptr_type), true);
3843 aggr_ptr
3844 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3845 base_name);
3846 break;
3849 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3851 while (orig_stmt);
3854 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3855 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3856 def-use update cycles for the pointer: one relative to the outer-loop
3857 (LOOP), which is what steps (3) and (4) below do. The other is relative
3858 to the inner-loop (which is the inner-most loop containing the dataref),
3859 and this is done be step (5) below.
3861 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3862 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3863 redundant. Steps (3),(4) create the following:
3865 vp0 = &base_addr;
3866 LOOP: vp1 = phi(vp0,vp2)
3869 vp2 = vp1 + step
3870 goto LOOP
3872 If there is an inner-loop nested in loop, then step (5) will also be
3873 applied, and an additional update in the inner-loop will be created:
3875 vp0 = &base_addr;
3876 LOOP: vp1 = phi(vp0,vp2)
3878 inner: vp3 = phi(vp1,vp4)
3879 vp4 = vp3 + inner_step
3880 if () goto inner
3882 vp2 = vp1 + step
3883 if () goto LOOP */
3885 /* (2) Calculate the initial address of the aggregate-pointer, and set
3886 the aggregate-pointer to point to it before the loop. */
3888 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3890 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3891 offset, loop);
3892 if (new_stmt_list)
3894 if (pe)
3896 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3897 gcc_assert (!new_bb);
3899 else
3900 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3903 *initial_address = new_temp;
3905 /* Create: p = (aggr_type *) initial_base */
3906 if (TREE_CODE (new_temp) != SSA_NAME
3907 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3909 vec_stmt = gimple_build_assign (aggr_ptr,
3910 fold_convert (aggr_ptr_type, new_temp));
3911 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3912 /* Copy the points-to information if it exists. */
3913 if (DR_PTR_INFO (dr))
3914 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3915 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3916 if (pe)
3918 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3919 gcc_assert (!new_bb);
3921 else
3922 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3924 else
3925 aggr_ptr_init = new_temp;
3927 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3928 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3929 inner-loop nested in LOOP (during outer-loop vectorization). */
3931 /* No update in loop is required. */
3932 if (only_init && (!loop_vinfo || at_loop == loop))
3933 aptr = aggr_ptr_init;
3934 else
3936 /* The step of the aggregate pointer is the type size. */
3937 tree step = TYPE_SIZE_UNIT (aggr_type);
3938 /* One exception to the above is when the scalar step of the load in
3939 LOOP is zero. In this case the step here is also zero. */
3940 if (*inv_p)
3941 step = size_zero_node;
3942 else if (negative)
3943 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3945 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3947 create_iv (aggr_ptr_init,
3948 fold_convert (aggr_ptr_type, step),
3949 aggr_ptr, loop, &incr_gsi, insert_after,
3950 &indx_before_incr, &indx_after_incr);
3951 incr = gsi_stmt (incr_gsi);
3952 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3954 /* Copy the points-to information if it exists. */
3955 if (DR_PTR_INFO (dr))
3957 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3958 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3960 if (ptr_incr)
3961 *ptr_incr = incr;
3963 aptr = indx_before_incr;
3966 if (!nested_in_vect_loop || only_init)
3967 return aptr;
3970 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3971 nested in LOOP, if exists. */
3973 gcc_assert (nested_in_vect_loop);
3974 if (!only_init)
3976 standard_iv_increment_position (containing_loop, &incr_gsi,
3977 &insert_after);
3978 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3979 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3980 &indx_after_incr);
3981 incr = gsi_stmt (incr_gsi);
3982 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3984 /* Copy the points-to information if it exists. */
3985 if (DR_PTR_INFO (dr))
3987 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3988 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3990 if (ptr_incr)
3991 *ptr_incr = incr;
3993 return indx_before_incr;
3995 else
3996 gcc_unreachable ();
4000 /* Function bump_vector_ptr
4002 Increment a pointer (to a vector type) by vector-size. If requested,
4003 i.e. if PTR-INCR is given, then also connect the new increment stmt
4004 to the existing def-use update-chain of the pointer, by modifying
4005 the PTR_INCR as illustrated below:
4007 The pointer def-use update-chain before this function:
4008 DATAREF_PTR = phi (p_0, p_2)
4009 ....
4010 PTR_INCR: p_2 = DATAREF_PTR + step
4012 The pointer def-use update-chain after this function:
4013 DATAREF_PTR = phi (p_0, p_2)
4014 ....
4015 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4016 ....
4017 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
4019 Input:
4020 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
4021 in the loop.
4022 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
4023 the loop. The increment amount across iterations is expected
4024 to be vector_size.
4025 BSI - location where the new update stmt is to be placed.
4026 STMT - the original scalar memory-access stmt that is being vectorized.
4027 BUMP - optional. The offset by which to bump the pointer. If not given,
4028 the offset is assumed to be vector_size.
4030 Output: Return NEW_DATAREF_PTR as illustrated above.
4034 tree
4035 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
4036 gimple stmt, tree bump)
4038 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4039 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4040 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4041 tree update = TYPE_SIZE_UNIT (vectype);
4042 gimple incr_stmt;
4043 ssa_op_iter iter;
4044 use_operand_p use_p;
4045 tree new_dataref_ptr;
4047 if (bump)
4048 update = bump;
4050 new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
4051 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
4052 dataref_ptr, update);
4053 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
4055 /* Copy the points-to information if it exists. */
4056 if (DR_PTR_INFO (dr))
4058 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
4059 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
4062 if (!ptr_incr)
4063 return new_dataref_ptr;
4065 /* Update the vector-pointer's cross-iteration increment. */
4066 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4068 tree use = USE_FROM_PTR (use_p);
4070 if (use == dataref_ptr)
4071 SET_USE (use_p, new_dataref_ptr);
4072 else
4073 gcc_assert (tree_int_cst_compare (use, update) == 0);
4076 return new_dataref_ptr;
4080 /* Function vect_create_destination_var.
4082 Create a new temporary of type VECTYPE. */
4084 tree
4085 vect_create_destination_var (tree scalar_dest, tree vectype)
4087 tree vec_dest;
4088 const char *new_name;
4089 tree type;
4090 enum vect_var_kind kind;
4092 kind = vectype ? vect_simple_var : vect_scalar_var;
4093 type = vectype ? vectype : TREE_TYPE (scalar_dest);
4095 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4097 new_name = get_name (scalar_dest);
4098 if (!new_name)
4099 new_name = "var_";
4100 vec_dest = vect_get_new_vect_var (type, kind, new_name);
4102 return vec_dest;
4105 /* Function vect_grouped_store_supported.
4107 Returns TRUE if interleave high and interleave low permutations
4108 are supported, and FALSE otherwise. */
4110 bool
4111 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
4113 enum machine_mode mode = TYPE_MODE (vectype);
4115 /* vect_permute_store_chain requires the group size to be a power of two. */
4116 if (exact_log2 (count) == -1)
4118 if (dump_enabled_p ())
4119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4120 "the size of the group of accesses"
4121 " is not a power of 2");
4122 return false;
4125 /* Check that the permutation is supported. */
4126 if (VECTOR_MODE_P (mode))
4128 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4129 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4130 for (i = 0; i < nelt / 2; i++)
4132 sel[i * 2] = i;
4133 sel[i * 2 + 1] = i + nelt;
4135 if (can_vec_perm_p (mode, false, sel))
4137 for (i = 0; i < nelt; i++)
4138 sel[i] += nelt / 2;
4139 if (can_vec_perm_p (mode, false, sel))
4140 return true;
4144 if (dump_enabled_p ())
4145 dump_printf (MSG_MISSED_OPTIMIZATION,
4146 "interleave op not supported by target.");
4147 return false;
4151 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
4152 type VECTYPE. */
4154 bool
4155 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4157 return vect_lanes_optab_supported_p ("vec_store_lanes",
4158 vec_store_lanes_optab,
4159 vectype, count);
4163 /* Function vect_permute_store_chain.
4165 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4166 a power of 2, generate interleave_high/low stmts to reorder the data
4167 correctly for the stores. Return the final references for stores in
4168 RESULT_CHAIN.
4170 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4171 The input is 4 vectors each containing 8 elements. We assign a number to
4172 each element, the input sequence is:
4174 1st vec: 0 1 2 3 4 5 6 7
4175 2nd vec: 8 9 10 11 12 13 14 15
4176 3rd vec: 16 17 18 19 20 21 22 23
4177 4th vec: 24 25 26 27 28 29 30 31
4179 The output sequence should be:
4181 1st vec: 0 8 16 24 1 9 17 25
4182 2nd vec: 2 10 18 26 3 11 19 27
4183 3rd vec: 4 12 20 28 5 13 21 30
4184 4th vec: 6 14 22 30 7 15 23 31
4186 i.e., we interleave the contents of the four vectors in their order.
4188 We use interleave_high/low instructions to create such output. The input of
4189 each interleave_high/low operation is two vectors:
4190 1st vec 2nd vec
4191 0 1 2 3 4 5 6 7
4192 the even elements of the result vector are obtained left-to-right from the
4193 high/low elements of the first vector. The odd elements of the result are
4194 obtained left-to-right from the high/low elements of the second vector.
4195 The output of interleave_high will be: 0 4 1 5
4196 and of interleave_low: 2 6 3 7
4199 The permutation is done in log LENGTH stages. In each stage interleave_high
4200 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4201 where the first argument is taken from the first half of DR_CHAIN and the
4202 second argument from it's second half.
4203 In our example,
4205 I1: interleave_high (1st vec, 3rd vec)
4206 I2: interleave_low (1st vec, 3rd vec)
4207 I3: interleave_high (2nd vec, 4th vec)
4208 I4: interleave_low (2nd vec, 4th vec)
4210 The output for the first stage is:
4212 I1: 0 16 1 17 2 18 3 19
4213 I2: 4 20 5 21 6 22 7 23
4214 I3: 8 24 9 25 10 26 11 27
4215 I4: 12 28 13 29 14 30 15 31
4217 The output of the second stage, i.e. the final result is:
4219 I1: 0 8 16 24 1 9 17 25
4220 I2: 2 10 18 26 3 11 19 27
4221 I3: 4 12 20 28 5 13 21 30
4222 I4: 6 14 22 30 7 15 23 31. */
4224 void
4225 vect_permute_store_chain (vec<tree> dr_chain,
4226 unsigned int length,
4227 gimple stmt,
4228 gimple_stmt_iterator *gsi,
4229 vec<tree> *result_chain)
4231 tree vect1, vect2, high, low;
4232 gimple perm_stmt;
4233 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4234 tree perm_mask_low, perm_mask_high;
4235 unsigned int i, n;
4236 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
4237 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4239 *result_chain = dr_chain.copy ();
4241 for (i = 0, n = nelt / 2; i < n; i++)
4243 sel[i * 2] = i;
4244 sel[i * 2 + 1] = i + nelt;
4246 perm_mask_high = vect_gen_perm_mask (vectype, sel);
4247 gcc_assert (perm_mask_high != NULL);
4249 for (i = 0; i < nelt; i++)
4250 sel[i] += nelt / 2;
4251 perm_mask_low = vect_gen_perm_mask (vectype, sel);
4252 gcc_assert (perm_mask_low != NULL);
4254 for (i = 0, n = exact_log2 (length); i < n; i++)
4256 for (j = 0; j < length/2; j++)
4258 vect1 = dr_chain[j];
4259 vect2 = dr_chain[j+length/2];
4261 /* Create interleaving stmt:
4262 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4263 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
4264 perm_stmt
4265 = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
4266 vect1, vect2, perm_mask_high);
4267 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4268 (*result_chain)[2*j] = high;
4270 /* Create interleaving stmt:
4271 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4272 nelt*3/2+1, ...}> */
4273 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
4274 perm_stmt
4275 = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
4276 vect1, vect2, perm_mask_low);
4277 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4278 (*result_chain)[2*j+1] = low;
4280 dr_chain = result_chain->copy ();
4284 /* Function vect_setup_realignment
4286 This function is called when vectorizing an unaligned load using
4287 the dr_explicit_realign[_optimized] scheme.
4288 This function generates the following code at the loop prolog:
4290 p = initial_addr;
4291 x msq_init = *(floor(p)); # prolog load
4292 realignment_token = call target_builtin;
4293 loop:
4294 x msq = phi (msq_init, ---)
4296 The stmts marked with x are generated only for the case of
4297 dr_explicit_realign_optimized.
4299 The code above sets up a new (vector) pointer, pointing to the first
4300 location accessed by STMT, and a "floor-aligned" load using that pointer.
4301 It also generates code to compute the "realignment-token" (if the relevant
4302 target hook was defined), and creates a phi-node at the loop-header bb
4303 whose arguments are the result of the prolog-load (created by this
4304 function) and the result of a load that takes place in the loop (to be
4305 created by the caller to this function).
4307 For the case of dr_explicit_realign_optimized:
4308 The caller to this function uses the phi-result (msq) to create the
4309 realignment code inside the loop, and sets up the missing phi argument,
4310 as follows:
4311 loop:
4312 msq = phi (msq_init, lsq)
4313 lsq = *(floor(p')); # load in loop
4314 result = realign_load (msq, lsq, realignment_token);
4316 For the case of dr_explicit_realign:
4317 loop:
4318 msq = *(floor(p)); # load in loop
4319 p' = p + (VS-1);
4320 lsq = *(floor(p')); # load in loop
4321 result = realign_load (msq, lsq, realignment_token);
4323 Input:
4324 STMT - (scalar) load stmt to be vectorized. This load accesses
4325 a memory location that may be unaligned.
4326 BSI - place where new code is to be inserted.
4327 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4328 is used.
4330 Output:
4331 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4332 target hook, if defined.
4333 Return value - the result of the loop-header phi node. */
4335 tree
4336 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4337 tree *realignment_token,
4338 enum dr_alignment_support alignment_support_scheme,
4339 tree init_addr,
4340 struct loop **at_loop)
4342 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4343 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4344 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4345 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4346 struct loop *loop = NULL;
4347 edge pe = NULL;
4348 tree scalar_dest = gimple_assign_lhs (stmt);
4349 tree vec_dest;
4350 gimple inc;
4351 tree ptr;
4352 tree data_ref;
4353 gimple new_stmt;
4354 basic_block new_bb;
4355 tree msq_init = NULL_TREE;
4356 tree new_temp;
4357 gimple phi_stmt;
4358 tree msq = NULL_TREE;
4359 gimple_seq stmts = NULL;
4360 bool inv_p;
4361 bool compute_in_loop = false;
4362 bool nested_in_vect_loop = false;
4363 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4364 struct loop *loop_for_initial_load = NULL;
4366 if (loop_vinfo)
4368 loop = LOOP_VINFO_LOOP (loop_vinfo);
4369 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4372 gcc_assert (alignment_support_scheme == dr_explicit_realign
4373 || alignment_support_scheme == dr_explicit_realign_optimized);
4375 /* We need to generate three things:
4376 1. the misalignment computation
4377 2. the extra vector load (for the optimized realignment scheme).
4378 3. the phi node for the two vectors from which the realignment is
4379 done (for the optimized realignment scheme). */
4381 /* 1. Determine where to generate the misalignment computation.
4383 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4384 calculation will be generated by this function, outside the loop (in the
4385 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4386 caller, inside the loop.
4388 Background: If the misalignment remains fixed throughout the iterations of
4389 the loop, then both realignment schemes are applicable, and also the
4390 misalignment computation can be done outside LOOP. This is because we are
4391 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4392 are a multiple of VS (the Vector Size), and therefore the misalignment in
4393 different vectorized LOOP iterations is always the same.
4394 The problem arises only if the memory access is in an inner-loop nested
4395 inside LOOP, which is now being vectorized using outer-loop vectorization.
4396 This is the only case when the misalignment of the memory access may not
4397 remain fixed throughout the iterations of the inner-loop (as explained in
4398 detail in vect_supportable_dr_alignment). In this case, not only is the
4399 optimized realignment scheme not applicable, but also the misalignment
4400 computation (and generation of the realignment token that is passed to
4401 REALIGN_LOAD) have to be done inside the loop.
4403 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4404 or not, which in turn determines if the misalignment is computed inside
4405 the inner-loop, or outside LOOP. */
4407 if (init_addr != NULL_TREE || !loop_vinfo)
4409 compute_in_loop = true;
4410 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4414 /* 2. Determine where to generate the extra vector load.
4416 For the optimized realignment scheme, instead of generating two vector
4417 loads in each iteration, we generate a single extra vector load in the
4418 preheader of the loop, and in each iteration reuse the result of the
4419 vector load from the previous iteration. In case the memory access is in
4420 an inner-loop nested inside LOOP, which is now being vectorized using
4421 outer-loop vectorization, we need to determine whether this initial vector
4422 load should be generated at the preheader of the inner-loop, or can be
4423 generated at the preheader of LOOP. If the memory access has no evolution
4424 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4425 to be generated inside LOOP (in the preheader of the inner-loop). */
4427 if (nested_in_vect_loop)
4429 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4430 bool invariant_in_outerloop =
4431 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4432 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4434 else
4435 loop_for_initial_load = loop;
4436 if (at_loop)
4437 *at_loop = loop_for_initial_load;
4439 if (loop_for_initial_load)
4440 pe = loop_preheader_edge (loop_for_initial_load);
4442 /* 3. For the case of the optimized realignment, create the first vector
4443 load at the loop preheader. */
4445 if (alignment_support_scheme == dr_explicit_realign_optimized)
4447 /* Create msq_init = *(floor(p1)) in the loop preheader */
4449 gcc_assert (!compute_in_loop);
4450 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4451 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4452 NULL_TREE, &init_addr, NULL, &inc,
4453 true, &inv_p);
4454 new_temp = copy_ssa_name (ptr, NULL);
4455 new_stmt = gimple_build_assign_with_ops
4456 (BIT_AND_EXPR, new_temp, ptr,
4457 build_int_cst (TREE_TYPE (ptr),
4458 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4459 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4460 gcc_assert (!new_bb);
4461 data_ref
4462 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4463 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4464 new_stmt = gimple_build_assign (vec_dest, data_ref);
4465 new_temp = make_ssa_name (vec_dest, new_stmt);
4466 gimple_assign_set_lhs (new_stmt, new_temp);
4467 if (pe)
4469 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4470 gcc_assert (!new_bb);
4472 else
4473 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4475 msq_init = gimple_assign_lhs (new_stmt);
4478 /* 4. Create realignment token using a target builtin, if available.
4479 It is done either inside the containing loop, or before LOOP (as
4480 determined above). */
4482 if (targetm.vectorize.builtin_mask_for_load)
4484 tree builtin_decl;
4486 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4487 if (!init_addr)
4489 /* Generate the INIT_ADDR computation outside LOOP. */
4490 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4491 NULL_TREE, loop);
4492 if (loop)
4494 pe = loop_preheader_edge (loop);
4495 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4496 gcc_assert (!new_bb);
4498 else
4499 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4502 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4503 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4504 vec_dest =
4505 vect_create_destination_var (scalar_dest,
4506 gimple_call_return_type (new_stmt));
4507 new_temp = make_ssa_name (vec_dest, new_stmt);
4508 gimple_call_set_lhs (new_stmt, new_temp);
4510 if (compute_in_loop)
4511 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4512 else
4514 /* Generate the misalignment computation outside LOOP. */
4515 pe = loop_preheader_edge (loop);
4516 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4517 gcc_assert (!new_bb);
4520 *realignment_token = gimple_call_lhs (new_stmt);
4522 /* The result of the CALL_EXPR to this builtin is determined from
4523 the value of the parameter and no global variables are touched
4524 which makes the builtin a "const" function. Requiring the
4525 builtin to have the "const" attribute makes it unnecessary
4526 to call mark_call_clobbered. */
4527 gcc_assert (TREE_READONLY (builtin_decl));
4530 if (alignment_support_scheme == dr_explicit_realign)
4531 return msq;
4533 gcc_assert (!compute_in_loop);
4534 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4537 /* 5. Create msq = phi <msq_init, lsq> in loop */
4539 pe = loop_preheader_edge (containing_loop);
4540 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4541 msq = make_ssa_name (vec_dest, NULL);
4542 phi_stmt = create_phi_node (msq, containing_loop->header);
4543 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4545 return msq;
4549 /* Function vect_grouped_load_supported.
4551 Returns TRUE if even and odd permutations are supported,
4552 and FALSE otherwise. */
4554 bool
4555 vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4557 enum machine_mode mode = TYPE_MODE (vectype);
4559 /* vect_permute_load_chain requires the group size to be a power of two. */
4560 if (exact_log2 (count) == -1)
4562 if (dump_enabled_p ())
4563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4564 "the size of the group of accesses"
4565 " is not a power of 2");
4566 return false;
4569 /* Check that the permutation is supported. */
4570 if (VECTOR_MODE_P (mode))
4572 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4573 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4575 for (i = 0; i < nelt; i++)
4576 sel[i] = i * 2;
4577 if (can_vec_perm_p (mode, false, sel))
4579 for (i = 0; i < nelt; i++)
4580 sel[i] = i * 2 + 1;
4581 if (can_vec_perm_p (mode, false, sel))
4582 return true;
4586 if (dump_enabled_p ())
4587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4588 "extract even/odd not supported by target");
4589 return false;
4592 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4593 type VECTYPE. */
4595 bool
4596 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4598 return vect_lanes_optab_supported_p ("vec_load_lanes",
4599 vec_load_lanes_optab,
4600 vectype, count);
4603 /* Function vect_permute_load_chain.
4605 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4606 a power of 2, generate extract_even/odd stmts to reorder the input data
4607 correctly. Return the final references for loads in RESULT_CHAIN.
4609 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4610 The input is 4 vectors each containing 8 elements. We assign a number to each
4611 element, the input sequence is:
4613 1st vec: 0 1 2 3 4 5 6 7
4614 2nd vec: 8 9 10 11 12 13 14 15
4615 3rd vec: 16 17 18 19 20 21 22 23
4616 4th vec: 24 25 26 27 28 29 30 31
4618 The output sequence should be:
4620 1st vec: 0 4 8 12 16 20 24 28
4621 2nd vec: 1 5 9 13 17 21 25 29
4622 3rd vec: 2 6 10 14 18 22 26 30
4623 4th vec: 3 7 11 15 19 23 27 31
4625 i.e., the first output vector should contain the first elements of each
4626 interleaving group, etc.
4628 We use extract_even/odd instructions to create such output. The input of
4629 each extract_even/odd operation is two vectors
4630 1st vec 2nd vec
4631 0 1 2 3 4 5 6 7
4633 and the output is the vector of extracted even/odd elements. The output of
4634 extract_even will be: 0 2 4 6
4635 and of extract_odd: 1 3 5 7
4638 The permutation is done in log LENGTH stages. In each stage extract_even
4639 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4640 their order. In our example,
4642 E1: extract_even (1st vec, 2nd vec)
4643 E2: extract_odd (1st vec, 2nd vec)
4644 E3: extract_even (3rd vec, 4th vec)
4645 E4: extract_odd (3rd vec, 4th vec)
4647 The output for the first stage will be:
4649 E1: 0 2 4 6 8 10 12 14
4650 E2: 1 3 5 7 9 11 13 15
4651 E3: 16 18 20 22 24 26 28 30
4652 E4: 17 19 21 23 25 27 29 31
4654 In order to proceed and create the correct sequence for the next stage (or
4655 for the correct output, if the second stage is the last one, as in our
4656 example), we first put the output of extract_even operation and then the
4657 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4658 The input for the second stage is:
4660 1st vec (E1): 0 2 4 6 8 10 12 14
4661 2nd vec (E3): 16 18 20 22 24 26 28 30
4662 3rd vec (E2): 1 3 5 7 9 11 13 15
4663 4th vec (E4): 17 19 21 23 25 27 29 31
4665 The output of the second stage:
4667 E1: 0 4 8 12 16 20 24 28
4668 E2: 2 6 10 14 18 22 26 30
4669 E3: 1 5 9 13 17 21 25 29
4670 E4: 3 7 11 15 19 23 27 31
4672 And RESULT_CHAIN after reordering:
4674 1st vec (E1): 0 4 8 12 16 20 24 28
4675 2nd vec (E3): 1 5 9 13 17 21 25 29
4676 3rd vec (E2): 2 6 10 14 18 22 26 30
4677 4th vec (E4): 3 7 11 15 19 23 27 31. */
4679 static void
4680 vect_permute_load_chain (vec<tree> dr_chain,
4681 unsigned int length,
4682 gimple stmt,
4683 gimple_stmt_iterator *gsi,
4684 vec<tree> *result_chain)
4686 tree data_ref, first_vect, second_vect;
4687 tree perm_mask_even, perm_mask_odd;
4688 gimple perm_stmt;
4689 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4690 unsigned int i, j, log_length = exact_log2 (length);
4691 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4692 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4694 *result_chain = dr_chain.copy ();
4696 for (i = 0; i < nelt; ++i)
4697 sel[i] = i * 2;
4698 perm_mask_even = vect_gen_perm_mask (vectype, sel);
4699 gcc_assert (perm_mask_even != NULL);
4701 for (i = 0; i < nelt; ++i)
4702 sel[i] = i * 2 + 1;
4703 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4704 gcc_assert (perm_mask_odd != NULL);
4706 for (i = 0; i < log_length; i++)
4708 for (j = 0; j < length; j += 2)
4710 first_vect = dr_chain[j];
4711 second_vect = dr_chain[j+1];
4713 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4714 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
4715 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4716 first_vect, second_vect,
4717 perm_mask_even);
4718 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4719 (*result_chain)[j/2] = data_ref;
4721 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4722 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
4723 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4724 first_vect, second_vect,
4725 perm_mask_odd);
4726 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4727 (*result_chain)[j/2+length/2] = data_ref;
4729 dr_chain = result_chain->copy ();
4734 /* Function vect_transform_grouped_load.
4736 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4737 to perform their permutation and ascribe the result vectorized statements to
4738 the scalar statements.
4741 void
4742 vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
4743 gimple_stmt_iterator *gsi)
4745 vec<tree> result_chain = vNULL;
4747 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4748 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4749 vectors, that are ready for vector computation. */
4750 result_chain.create (size);
4751 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4752 vect_record_grouped_load_vectors (stmt, result_chain);
4753 result_chain.release ();
4756 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4757 generated as part of the vectorization of STMT. Assign the statement
4758 for each vector to the associated scalar statement. */
4760 void
4761 vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
4763 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4764 gimple next_stmt, new_stmt;
4765 unsigned int i, gap_count;
4766 tree tmp_data_ref;
4768 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4769 Since we scan the chain starting from it's first node, their order
4770 corresponds the order of data-refs in RESULT_CHAIN. */
4771 next_stmt = first_stmt;
4772 gap_count = 1;
4773 FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
4775 if (!next_stmt)
4776 break;
4778 /* Skip the gaps. Loads created for the gaps will be removed by dead
4779 code elimination pass later. No need to check for the first stmt in
4780 the group, since it always exists.
4781 GROUP_GAP is the number of steps in elements from the previous
4782 access (if there is no gap GROUP_GAP is 1). We skip loads that
4783 correspond to the gaps. */
4784 if (next_stmt != first_stmt
4785 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4787 gap_count++;
4788 continue;
4791 while (next_stmt)
4793 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4794 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4795 copies, and we put the new vector statement in the first available
4796 RELATED_STMT. */
4797 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4798 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4799 else
4801 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4803 gimple prev_stmt =
4804 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4805 gimple rel_stmt =
4806 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4807 while (rel_stmt)
4809 prev_stmt = rel_stmt;
4810 rel_stmt =
4811 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4814 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4815 new_stmt;
4819 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4820 gap_count = 1;
4821 /* If NEXT_STMT accesses the same DR as the previous statement,
4822 put the same TMP_DATA_REF as its vectorized statement; otherwise
4823 get the next data-ref from RESULT_CHAIN. */
4824 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4825 break;
4830 /* Function vect_force_dr_alignment_p.
4832 Returns whether the alignment of a DECL can be forced to be aligned
4833 on ALIGNMENT bit boundary. */
4835 bool
4836 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4838 if (TREE_CODE (decl) != VAR_DECL)
4839 return false;
4841 /* We cannot change alignment of common or external symbols as another
4842 translation unit may contain a definition with lower alignment.
4843 The rules of common symbol linking mean that the definition
4844 will override the common symbol. */
4845 if (DECL_EXTERNAL (decl)
4846 || DECL_COMMON (decl))
4847 return false;
4849 if (TREE_ASM_WRITTEN (decl))
4850 return false;
4852 /* Do not override the alignment as specified by the ABI when the used
4853 attribute is set. */
4854 if (DECL_PRESERVE_P (decl))
4855 return false;
4857 /* Do not override explicit alignment set by the user when an explicit
4858 section name is also used. This is a common idiom used by many
4859 software projects. */
4860 if (DECL_SECTION_NAME (decl) != NULL_TREE
4861 && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
4862 return false;
4864 if (TREE_STATIC (decl))
4865 return (alignment <= MAX_OFILE_ALIGNMENT);
4866 else
4867 return (alignment <= MAX_STACK_ALIGNMENT);
4871 /* Return whether the data reference DR is supported with respect to its
4872 alignment.
4873 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4874 it is aligned, i.e., check if it is possible to vectorize it with different
4875 alignment. */
4877 enum dr_alignment_support
4878 vect_supportable_dr_alignment (struct data_reference *dr,
4879 bool check_aligned_accesses)
4881 gimple stmt = DR_STMT (dr);
4882 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4883 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4884 enum machine_mode mode = TYPE_MODE (vectype);
4885 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4886 struct loop *vect_loop = NULL;
4887 bool nested_in_vect_loop = false;
4889 if (aligned_access_p (dr) && !check_aligned_accesses)
4890 return dr_aligned;
4892 if (loop_vinfo)
4894 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4895 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4898 /* Possibly unaligned access. */
4900 /* We can choose between using the implicit realignment scheme (generating
4901 a misaligned_move stmt) and the explicit realignment scheme (generating
4902 aligned loads with a REALIGN_LOAD). There are two variants to the
4903 explicit realignment scheme: optimized, and unoptimized.
4904 We can optimize the realignment only if the step between consecutive
4905 vector loads is equal to the vector size. Since the vector memory
4906 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4907 is guaranteed that the misalignment amount remains the same throughout the
4908 execution of the vectorized loop. Therefore, we can create the
4909 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4910 at the loop preheader.
4912 However, in the case of outer-loop vectorization, when vectorizing a
4913 memory access in the inner-loop nested within the LOOP that is now being
4914 vectorized, while it is guaranteed that the misalignment of the
4915 vectorized memory access will remain the same in different outer-loop
4916 iterations, it is *not* guaranteed that is will remain the same throughout
4917 the execution of the inner-loop. This is because the inner-loop advances
4918 with the original scalar step (and not in steps of VS). If the inner-loop
4919 step happens to be a multiple of VS, then the misalignment remains fixed
4920 and we can use the optimized realignment scheme. For example:
4922 for (i=0; i<N; i++)
4923 for (j=0; j<M; j++)
4924 s += a[i+j];
4926 When vectorizing the i-loop in the above example, the step between
4927 consecutive vector loads is 1, and so the misalignment does not remain
4928 fixed across the execution of the inner-loop, and the realignment cannot
4929 be optimized (as illustrated in the following pseudo vectorized loop):
4931 for (i=0; i<N; i+=4)
4932 for (j=0; j<M; j++){
4933 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4934 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4935 // (assuming that we start from an aligned address).
4938 We therefore have to use the unoptimized realignment scheme:
4940 for (i=0; i<N; i+=4)
4941 for (j=k; j<M; j+=4)
4942 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4943 // that the misalignment of the initial address is
4944 // 0).
4946 The loop can then be vectorized as follows:
4948 for (k=0; k<4; k++){
4949 rt = get_realignment_token (&vp[k]);
4950 for (i=0; i<N; i+=4){
4951 v1 = vp[i+k];
4952 for (j=k; j<M; j+=4){
4953 v2 = vp[i+j+VS-1];
4954 va = REALIGN_LOAD <v1,v2,rt>;
4955 vs += va;
4956 v1 = v2;
4959 } */
4961 if (DR_IS_READ (dr))
4963 bool is_packed = false;
4964 tree type = (TREE_TYPE (DR_REF (dr)));
4966 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4967 && (!targetm.vectorize.builtin_mask_for_load
4968 || targetm.vectorize.builtin_mask_for_load ()))
4970 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4971 if ((nested_in_vect_loop
4972 && (TREE_INT_CST_LOW (DR_STEP (dr))
4973 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4974 || !loop_vinfo)
4975 return dr_explicit_realign;
4976 else
4977 return dr_explicit_realign_optimized;
4979 if (!known_alignment_for_access_p (dr))
4980 is_packed = not_size_aligned (DR_REF (dr));
4982 if (targetm.vectorize.
4983 support_vector_misalignment (mode, type,
4984 DR_MISALIGNMENT (dr), is_packed))
4985 /* Can't software pipeline the loads, but can at least do them. */
4986 return dr_unaligned_supported;
4988 else
4990 bool is_packed = false;
4991 tree type = (TREE_TYPE (DR_REF (dr)));
4993 if (!known_alignment_for_access_p (dr))
4994 is_packed = not_size_aligned (DR_REF (dr));
4996 if (targetm.vectorize.
4997 support_vector_misalignment (mode, type,
4998 DR_MISALIGNMENT (dr), is_packed))
4999 return dr_unaligned_supported;
5002 /* Unsupported. */
5003 return dr_unaligned_unsupported;