missing Changelog
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob44fe374f965f57c001733e80f438c422003e6549
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "dumpfile.h"
27 #include "tm.h"
28 #include "ggc.h"
29 #include "tree.h"
30 #include "tm_p.h"
31 #include "target.h"
32 #include "basic-block.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "dumpfile.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
49 static bool
50 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 tree vectype, unsigned HOST_WIDE_INT count)
53 enum machine_mode mode, array_mode;
54 bool limit_p;
56 mode = TYPE_MODE (vectype);
57 limit_p = !targetm.array_mode_supported_p (mode, count);
58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
59 MODE_INT, limit_p);
61 if (array_mode == BLKmode)
63 if (dump_enabled_p ())
64 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
65 "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
66 GET_MODE_NAME (mode), count);
67 return false;
70 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
72 if (dump_enabled_p ())
73 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
74 "cannot use %s<%s><%s>", name,
75 GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
76 return false;
79 if (dump_enabled_p ())
80 dump_printf_loc (MSG_NOTE, vect_location,
81 "can use %s<%s><%s>", name, GET_MODE_NAME (array_mode),
82 GET_MODE_NAME (mode));
84 return true;
88 /* Return the smallest scalar part of STMT.
89 This is used to determine the vectype of the stmt. We generally set the
90 vectype according to the type of the result (lhs). For stmts whose
91 result-type is different than the type of the arguments (e.g., demotion,
92 promotion), vectype will be reset appropriately (later). Note that we have
93 to visit the smallest datatype in this function, because that determines the
94 VF. If the smallest datatype in the loop is present only as the rhs of a
95 promotion operation - we'd miss it.
96 Such a case, where a variable of this datatype does not appear in the lhs
97 anywhere in the loop, can only occur if it's an invariant: e.g.:
98 'int_x = (int) short_inv', which we'd expect to have been optimized away by
99 invariant motion. However, we cannot rely on invariant motion to always
100 take invariants out of the loop, and so in the case of promotion we also
101 have to check the rhs.
102 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
103 types. */
105 tree
106 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
107 HOST_WIDE_INT *rhs_size_unit)
109 tree scalar_type = gimple_expr_type (stmt);
110 HOST_WIDE_INT lhs, rhs;
112 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
114 if (is_gimple_assign (stmt)
115 && (gimple_assign_cast_p (stmt)
116 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
117 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
118 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
120 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
122 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
123 if (rhs < lhs)
124 scalar_type = rhs_type;
127 *lhs_size_unit = lhs;
128 *rhs_size_unit = rhs;
129 return scalar_type;
133 /* Find the place of the data-ref in STMT in the interleaving chain that starts
134 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
137 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
139 gimple next_stmt = first_stmt;
140 int result = 0;
142 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
143 return -1;
145 while (next_stmt && next_stmt != stmt)
147 result++;
148 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
151 if (next_stmt)
152 return result;
153 else
154 return -1;
158 /* Function vect_insert_into_interleaving_chain.
160 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
162 static void
163 vect_insert_into_interleaving_chain (struct data_reference *dra,
164 struct data_reference *drb)
166 gimple prev, next;
167 tree next_init;
168 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
169 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
171 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
172 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
173 while (next)
175 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
176 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
178 /* Insert here. */
179 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
180 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
181 return;
183 prev = next;
184 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
187 /* We got to the end of the list. Insert here. */
188 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
189 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
193 /* Function vect_update_interleaving_chain.
195 For two data-refs DRA and DRB that are a part of a chain interleaved data
196 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
198 There are four possible cases:
199 1. New stmts - both DRA and DRB are not a part of any chain:
200 FIRST_DR = DRB
201 NEXT_DR (DRB) = DRA
202 2. DRB is a part of a chain and DRA is not:
203 no need to update FIRST_DR
204 no need to insert DRB
205 insert DRA according to init
206 3. DRA is a part of a chain and DRB is not:
207 if (init of FIRST_DR > init of DRB)
208 FIRST_DR = DRB
209 NEXT(FIRST_DR) = previous FIRST_DR
210 else
211 insert DRB according to its init
212 4. both DRA and DRB are in some interleaving chains:
213 choose the chain with the smallest init of FIRST_DR
214 insert the nodes of the second chain into the first one. */
216 static void
217 vect_update_interleaving_chain (struct data_reference *drb,
218 struct data_reference *dra)
220 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
221 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
222 tree next_init, init_dra_chain, init_drb_chain;
223 gimple first_a, first_b;
224 tree node_init;
225 gimple node, prev, next, first_stmt;
227 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
228 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
230 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
231 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
232 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
233 return;
236 /* 2. DRB is a part of a chain and DRA is not. */
237 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
239 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
240 /* Insert DRA into the chain of DRB. */
241 vect_insert_into_interleaving_chain (dra, drb);
242 return;
245 /* 3. DRA is a part of a chain and DRB is not. */
246 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
248 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
249 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
250 old_first_stmt)));
251 gimple tmp;
253 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
255 /* DRB's init is smaller than the init of the stmt previously marked
256 as the first stmt of the interleaving chain of DRA. Therefore, we
257 update FIRST_STMT and put DRB in the head of the list. */
258 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
259 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
261 /* Update all the stmts in the list to point to the new FIRST_STMT. */
262 tmp = old_first_stmt;
263 while (tmp)
265 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
266 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
269 else
271 /* Insert DRB in the list of DRA. */
272 vect_insert_into_interleaving_chain (drb, dra);
273 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
275 return;
278 /* 4. both DRA and DRB are in some interleaving chains. */
279 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
280 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
281 if (first_a == first_b)
282 return;
283 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
284 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
286 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
288 /* Insert the nodes of DRA chain into the DRB chain.
289 After inserting a node, continue from this node of the DRB chain (don't
290 start from the beginning. */
291 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
292 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
293 first_stmt = first_b;
295 else
297 /* Insert the nodes of DRB chain into the DRA chain.
298 After inserting a node, continue from this node of the DRA chain (don't
299 start from the beginning. */
300 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
301 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
302 first_stmt = first_a;
305 while (node)
307 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
308 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
309 while (next)
311 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
312 if (tree_int_cst_compare (next_init, node_init) > 0)
314 /* Insert here. */
315 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
316 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
317 prev = node;
318 break;
320 prev = next;
321 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
323 if (!next)
325 /* We got to the end of the list. Insert here. */
326 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
327 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
328 prev = node;
330 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
331 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
335 /* Check dependence between DRA and DRB for basic block vectorization.
336 If the accesses share same bases and offsets, we can compare their initial
337 constant offsets to decide whether they differ or not. In case of a read-
338 write dependence we check that the load is before the store to ensure that
339 vectorization will not change the order of the accesses. */
341 static bool
342 vect_drs_dependent_in_basic_block (struct data_reference *dra,
343 struct data_reference *drb)
345 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
346 gimple earlier_stmt;
348 /* We only call this function for pairs of loads and stores, but we verify
349 it here. */
350 if (DR_IS_READ (dra) == DR_IS_READ (drb))
352 if (DR_IS_READ (dra))
353 return false;
354 else
355 return true;
358 /* Check that the data-refs have same bases and offsets. If not, we can't
359 determine if they are dependent. */
360 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
361 || !dr_equal_offsets_p (dra, drb))
362 return true;
364 /* Check the types. */
365 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
366 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
368 if (type_size_a != type_size_b
369 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
370 TREE_TYPE (DR_REF (drb))))
371 return true;
373 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
374 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
376 /* Two different locations - no dependence. */
377 if (init_a != init_b)
378 return false;
380 /* We have a read-write dependence. Check that the load is before the store.
381 When we vectorize basic blocks, vector load can be only before
382 corresponding scalar load, and vector store can be only after its
383 corresponding scalar store. So the order of the acceses is preserved in
384 case the load is before the store. */
385 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
386 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
387 return false;
389 return true;
393 /* Function vect_check_interleaving.
395 Check if DRA and DRB are a part of interleaving. In case they are, insert
396 DRA and DRB in an interleaving chain. */
398 static bool
399 vect_check_interleaving (struct data_reference *dra,
400 struct data_reference *drb)
402 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
404 /* Check that the data-refs have same first location (except init) and they
405 are both either store or load (not load and store). */
406 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
407 || !dr_equal_offsets_p (dra, drb)
408 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
409 || DR_IS_READ (dra) != DR_IS_READ (drb))
410 return false;
412 /* Check:
413 1. data-refs are of the same type
414 2. their steps are equal
415 3. the step (if greater than zero) is greater than the difference between
416 data-refs' inits. */
417 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
418 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
420 if (type_size_a != type_size_b
421 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
422 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
423 TREE_TYPE (DR_REF (drb))))
424 return false;
426 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
427 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
428 step = TREE_INT_CST_LOW (DR_STEP (dra));
430 if (init_a > init_b)
432 /* If init_a == init_b + the size of the type * k, we have an interleaving,
433 and DRB is accessed before DRA. */
434 diff_mod_size = (init_a - init_b) % type_size_a;
436 if (step && (init_a - init_b) > step)
437 return false;
439 if (diff_mod_size == 0)
441 vect_update_interleaving_chain (drb, dra);
442 if (dump_enabled_p ())
444 dump_printf_loc (MSG_NOTE, vect_location,
445 "Detected interleaving ");
446 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
447 dump_printf (MSG_NOTE, " and ");
448 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
450 return true;
453 else
455 /* If init_b == init_a + the size of the type * k, we have an
456 interleaving, and DRA is accessed before DRB. */
457 diff_mod_size = (init_b - init_a) % type_size_a;
459 if (step && (init_b - init_a) > step)
460 return false;
462 if (diff_mod_size == 0)
464 vect_update_interleaving_chain (dra, drb);
465 if (dump_enabled_p ())
467 dump_printf_loc (MSG_NOTE, vect_location,
468 "Detected interleaving ");
469 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
470 dump_printf (MSG_NOTE, " and ");
471 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
473 return true;
477 return false;
480 /* Check if data references pointed by DR_I and DR_J are same or
481 belong to same interleaving group. Return FALSE if drs are
482 different, otherwise return TRUE. */
484 static bool
485 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
487 gimple stmt_i = DR_STMT (dr_i);
488 gimple stmt_j = DR_STMT (dr_j);
490 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
491 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
492 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
493 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
494 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
495 return true;
496 else
497 return false;
500 /* If address ranges represented by DDR_I and DDR_J are equal,
501 return TRUE, otherwise return FALSE. */
503 static bool
504 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
506 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
507 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
508 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
509 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
510 return true;
511 else
512 return false;
515 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
516 tested at run-time. Return TRUE if DDR was successfully inserted.
517 Return false if versioning is not supported. */
519 static bool
520 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
522 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
524 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
525 return false;
527 if (dump_enabled_p ())
529 dump_printf_loc (MSG_NOTE, vect_location,
530 "mark for run-time aliasing test between ");
531 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
532 dump_printf (MSG_NOTE, " and ");
533 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
536 if (optimize_loop_nest_for_size_p (loop))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
540 "versioning not supported when optimizing for size.");
541 return false;
544 /* FORNOW: We don't support versioning with outer-loop vectorization. */
545 if (loop->inner)
547 if (dump_enabled_p ())
548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
549 "versioning not yet supported for outer-loops.");
550 return false;
553 /* FORNOW: We don't support creating runtime alias tests for non-constant
554 step. */
555 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
556 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
558 if (dump_enabled_p ())
559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
560 "versioning not yet supported for non-constant "
561 "step");
562 return false;
565 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
566 return true;
570 /* Function vect_analyze_data_ref_dependence.
572 Return TRUE if there (might) exist a dependence between a memory-reference
573 DRA and a memory-reference DRB. When versioning for alias may check a
574 dependence at run-time, return FALSE. Adjust *MAX_VF according to
575 the data dependence. */
577 static bool
578 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
579 loop_vec_info loop_vinfo, int *max_vf)
581 unsigned int i;
582 struct loop *loop = NULL;
583 struct data_reference *dra = DDR_A (ddr);
584 struct data_reference *drb = DDR_B (ddr);
585 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
586 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
587 lambda_vector dist_v;
588 unsigned int loop_depth;
590 /* Don't bother to analyze statements marked as unvectorizable. */
591 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
592 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
593 return false;
595 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
597 /* Independent data accesses. */
598 vect_check_interleaving (dra, drb);
599 return false;
602 if (loop_vinfo)
603 loop = LOOP_VINFO_LOOP (loop_vinfo);
605 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
606 return false;
608 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
610 gimple earlier_stmt;
612 if (loop_vinfo)
614 if (dump_enabled_p ())
616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
617 "versioning for alias required: "
618 "can't determine dependence between ");
619 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
620 DR_REF (dra));
621 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
622 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
623 DR_REF (drb));
626 /* Add to list of ddrs that need to be tested at run-time. */
627 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
630 /* When vectorizing a basic block unknown depnedence can still mean
631 grouped access. */
632 if (vect_check_interleaving (dra, drb))
633 return false;
635 /* Read-read is OK (we need this check here, after checking for
636 interleaving). */
637 if (DR_IS_READ (dra) && DR_IS_READ (drb))
638 return false;
640 if (dump_enabled_p ())
642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
643 "can't determine dependence between ");
644 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
645 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
646 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
649 /* We do not vectorize basic blocks with write-write dependencies. */
650 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
651 return true;
653 /* Check that it's not a load-after-store dependence. */
654 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
655 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
656 return true;
658 return false;
661 /* Versioning for alias is not yet supported for basic block SLP, and
662 dependence distance is unapplicable, hence, in case of known data
663 dependence, basic block vectorization is impossible for now. */
664 if (!loop_vinfo)
666 if (dra != drb && vect_check_interleaving (dra, drb))
667 return false;
669 if (dump_enabled_p ())
671 dump_printf_loc (MSG_NOTE, vect_location,
672 "determined dependence between ");
673 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
674 dump_printf (MSG_NOTE, " and ");
675 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
678 /* Do not vectorize basic blcoks with write-write dependences. */
679 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
680 return true;
682 /* Check if this dependence is allowed in basic block vectorization. */
683 return vect_drs_dependent_in_basic_block (dra, drb);
686 /* Loop-based vectorization and known data dependence. */
687 if (DDR_NUM_DIST_VECTS (ddr) == 0)
689 if (dump_enabled_p ())
691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
692 "versioning for alias required: "
693 "bad dist vector for ");
694 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
695 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
696 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
698 /* Add to list of ddrs that need to be tested at run-time. */
699 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
702 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
703 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
705 int dist = dist_v[loop_depth];
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_NOTE, vect_location,
709 "dependence distance = %d.", dist);
711 if (dist == 0)
713 if (dump_enabled_p ())
715 dump_printf_loc (MSG_NOTE, vect_location,
716 "dependence distance == 0 between ");
717 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
718 dump_printf (MSG_NOTE, " and ");
719 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
722 /* For interleaving, mark that there is a read-write dependency if
723 necessary. We check before that one of the data-refs is store. */
724 if (DR_IS_READ (dra))
725 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
726 else
728 if (DR_IS_READ (drb))
729 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
732 continue;
735 if (dist > 0 && DDR_REVERSED_P (ddr))
737 /* If DDR_REVERSED_P the order of the data-refs in DDR was
738 reversed (to make distance vector positive), and the actual
739 distance is negative. */
740 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "dependence distance negative.");
743 continue;
746 if (abs (dist) >= 2
747 && abs (dist) < *max_vf)
749 /* The dependence distance requires reduction of the maximal
750 vectorization factor. */
751 *max_vf = abs (dist);
752 if (dump_enabled_p ())
753 dump_printf_loc (MSG_NOTE, vect_location,
754 "adjusting maximal vectorization factor to %i",
755 *max_vf);
758 if (abs (dist) >= *max_vf)
760 /* Dependence distance does not create dependence, as far as
761 vectorization is concerned, in this case. */
762 if (dump_enabled_p ())
763 dump_printf_loc (MSG_NOTE, vect_location,
764 "dependence distance >= VF.");
765 continue;
768 if (dump_enabled_p ())
770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
771 "not vectorized, possible dependence "
772 "between data-refs ");
773 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
774 dump_printf (MSG_NOTE, " and ");
775 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
778 return true;
781 return false;
784 /* Function vect_analyze_data_ref_dependences.
786 Examine all the data references in the loop, and make sure there do not
787 exist any data dependences between them. Set *MAX_VF according to
788 the maximum vectorization factor the data dependences allow. */
790 bool
791 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
792 bb_vec_info bb_vinfo, int *max_vf)
794 unsigned int i;
795 vec<ddr_p> ddrs = vNULL;
796 struct data_dependence_relation *ddr;
798 if (dump_enabled_p ())
799 dump_printf_loc (MSG_NOTE, vect_location,
800 "=== vect_analyze_dependences ===");
801 if (loop_vinfo)
802 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
803 else
804 ddrs = BB_VINFO_DDRS (bb_vinfo);
806 FOR_EACH_VEC_ELT (ddrs, i, ddr)
807 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
808 return false;
810 return true;
814 /* Function vect_compute_data_ref_alignment
816 Compute the misalignment of the data reference DR.
818 Output:
819 1. If during the misalignment computation it is found that the data reference
820 cannot be vectorized then false is returned.
821 2. DR_MISALIGNMENT (DR) is defined.
823 FOR NOW: No analysis is actually performed. Misalignment is calculated
824 only for trivial cases. TODO. */
826 static bool
827 vect_compute_data_ref_alignment (struct data_reference *dr)
829 gimple stmt = DR_STMT (dr);
830 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
831 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
832 struct loop *loop = NULL;
833 tree ref = DR_REF (dr);
834 tree vectype;
835 tree base, base_addr;
836 bool base_aligned;
837 tree misalign;
838 tree aligned_to, alignment;
840 if (dump_enabled_p ())
841 dump_printf_loc (MSG_NOTE, vect_location,
842 "vect_compute_data_ref_alignment:");
844 if (loop_vinfo)
845 loop = LOOP_VINFO_LOOP (loop_vinfo);
847 /* Initialize misalignment to unknown. */
848 SET_DR_MISALIGNMENT (dr, -1);
850 /* Strided loads perform only component accesses, misalignment information
851 is irrelevant for them. */
852 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
853 return true;
855 misalign = DR_INIT (dr);
856 aligned_to = DR_ALIGNED_TO (dr);
857 base_addr = DR_BASE_ADDRESS (dr);
858 vectype = STMT_VINFO_VECTYPE (stmt_info);
860 /* In case the dataref is in an inner-loop of the loop that is being
861 vectorized (LOOP), we use the base and misalignment information
862 relative to the outer-loop (LOOP). This is ok only if the misalignment
863 stays the same throughout the execution of the inner-loop, which is why
864 we have to check that the stride of the dataref in the inner-loop evenly
865 divides by the vector size. */
866 if (loop && nested_in_vect_loop_p (loop, stmt))
868 tree step = DR_STEP (dr);
869 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
871 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
873 if (dump_enabled_p ())
874 dump_printf_loc (MSG_NOTE, vect_location,
875 "inner step divides the vector-size.");
876 misalign = STMT_VINFO_DR_INIT (stmt_info);
877 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
878 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
880 else
882 if (dump_enabled_p ())
883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
884 "inner step doesn't divide the vector-size.");
885 misalign = NULL_TREE;
889 /* Similarly, if we're doing basic-block vectorization, we can only use
890 base and misalignment information relative to an innermost loop if the
891 misalignment stays the same throughout the execution of the loop.
892 As above, this is the case if the stride of the dataref evenly divides
893 by the vector size. */
894 if (!loop)
896 tree step = DR_STEP (dr);
897 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
899 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
901 if (dump_enabled_p ())
902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
903 "SLP: step doesn't divide the vector-size.");
904 misalign = NULL_TREE;
908 base = build_fold_indirect_ref (base_addr);
909 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
911 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
912 || !misalign)
914 if (dump_enabled_p ())
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
917 "Unknown alignment for access: ");
918 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
920 return true;
923 if ((DECL_P (base)
924 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
925 alignment) >= 0)
926 || (TREE_CODE (base_addr) == SSA_NAME
927 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
928 TREE_TYPE (base_addr)))),
929 alignment) >= 0)
930 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
931 base_aligned = true;
932 else
933 base_aligned = false;
935 if (!base_aligned)
937 /* Do not change the alignment of global variables here if
938 flag_section_anchors is enabled as we already generated
939 RTL for other functions. Most global variables should
940 have been aligned during the IPA increase_alignment pass. */
941 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
942 || (TREE_STATIC (base) && flag_section_anchors))
944 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE, vect_location,
947 "can't force alignment of ref: ");
948 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
950 return true;
953 /* Force the alignment of the decl.
954 NOTE: This is the only change to the code we make during
955 the analysis phase, before deciding to vectorize the loop. */
956 if (dump_enabled_p ())
958 dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
959 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
962 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
963 DECL_USER_ALIGN (base) = 1;
966 /* At this point we assume that the base is aligned. */
967 gcc_assert (base_aligned
968 || (TREE_CODE (base) == VAR_DECL
969 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
971 /* If this is a backward running DR then first access in the larger
972 vectype actually is N-1 elements before the address in the DR.
973 Adjust misalign accordingly. */
974 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
976 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
977 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
978 otherwise we wouldn't be here. */
979 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
980 /* PLUS because DR_STEP was negative. */
981 misalign = size_binop (PLUS_EXPR, misalign, offset);
984 /* Modulo alignment. */
985 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
987 if (!host_integerp (misalign, 1))
989 /* Negative or overflowed misalignment value. */
990 if (dump_enabled_p ())
991 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
992 "unexpected misalign value");
993 return false;
996 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
998 if (dump_enabled_p ())
1000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1001 "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1002 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
1005 return true;
1009 /* Function vect_compute_data_refs_alignment
1011 Compute the misalignment of data references in the loop.
1012 Return FALSE if a data reference is found that cannot be vectorized. */
1014 static bool
1015 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
1016 bb_vec_info bb_vinfo)
1018 vec<data_reference_p> datarefs;
1019 struct data_reference *dr;
1020 unsigned int i;
1022 if (loop_vinfo)
1023 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1024 else
1025 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1027 FOR_EACH_VEC_ELT (datarefs, i, dr)
1028 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
1029 && !vect_compute_data_ref_alignment (dr))
1031 if (bb_vinfo)
1033 /* Mark unsupported statement as unvectorizable. */
1034 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1035 continue;
1037 else
1038 return false;
1041 return true;
1045 /* Function vect_update_misalignment_for_peel
1047 DR - the data reference whose misalignment is to be adjusted.
1048 DR_PEEL - the data reference whose misalignment is being made
1049 zero in the vector loop by the peel.
1050 NPEEL - the number of iterations in the peel loop if the misalignment
1051 of DR_PEEL is known at compile time. */
1053 static void
1054 vect_update_misalignment_for_peel (struct data_reference *dr,
1055 struct data_reference *dr_peel, int npeel)
1057 unsigned int i;
1058 vec<dr_p> same_align_drs;
1059 struct data_reference *current_dr;
1060 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1061 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1062 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1063 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1065 /* For interleaved data accesses the step in the loop must be multiplied by
1066 the size of the interleaving group. */
1067 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1068 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1069 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
1070 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1072 /* It can be assumed that the data refs with the same alignment as dr_peel
1073 are aligned in the vector loop. */
1074 same_align_drs
1075 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1076 FOR_EACH_VEC_ELT (same_align_drs, i, current_dr)
1078 if (current_dr != dr)
1079 continue;
1080 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1081 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1082 SET_DR_MISALIGNMENT (dr, 0);
1083 return;
1086 if (known_alignment_for_access_p (dr)
1087 && known_alignment_for_access_p (dr_peel))
1089 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1090 int misal = DR_MISALIGNMENT (dr);
1091 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1092 misal += negative ? -npeel * dr_size : npeel * dr_size;
1093 misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
1094 SET_DR_MISALIGNMENT (dr, misal);
1095 return;
1098 if (dump_enabled_p ())
1099 dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment to -1.");
1100 SET_DR_MISALIGNMENT (dr, -1);
1104 /* Function vect_verify_datarefs_alignment
1106 Return TRUE if all data references in the loop can be
1107 handled with respect to alignment. */
1109 bool
1110 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1112 vec<data_reference_p> datarefs;
1113 struct data_reference *dr;
1114 enum dr_alignment_support supportable_dr_alignment;
1115 unsigned int i;
1117 if (loop_vinfo)
1118 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1119 else
1120 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1122 FOR_EACH_VEC_ELT (datarefs, i, dr)
1124 gimple stmt = DR_STMT (dr);
1125 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1127 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1128 continue;
1130 /* For interleaving, only the alignment of the first access matters.
1131 Skip statements marked as not vectorizable. */
1132 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
1133 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1134 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1135 continue;
1137 /* Strided loads perform only component accesses, alignment is
1138 irrelevant for them. */
1139 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1140 continue;
1142 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1143 if (!supportable_dr_alignment)
1145 if (dump_enabled_p ())
1147 if (DR_IS_READ (dr))
1148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1149 "not vectorized: unsupported unaligned load.");
1150 else
1151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1152 "not vectorized: unsupported unaligned "
1153 "store.");
1155 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1156 DR_REF (dr));
1158 return false;
1160 if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
1161 dump_printf_loc (MSG_NOTE, vect_location,
1162 "Vectorizing an unaligned access.");
1164 return true;
1167 /* Given an memory reference EXP return whether its alignment is less
1168 than its size. */
1170 static bool
1171 not_size_aligned (tree exp)
1173 if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp)), 1))
1174 return true;
1176 return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp)))
1177 > get_object_alignment (exp));
1180 /* Function vector_alignment_reachable_p
1182 Return true if vector alignment for DR is reachable by peeling
1183 a few loop iterations. Return false otherwise. */
1185 static bool
1186 vector_alignment_reachable_p (struct data_reference *dr)
1188 gimple stmt = DR_STMT (dr);
1189 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1190 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1192 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1194 /* For interleaved access we peel only if number of iterations in
1195 the prolog loop ({VF - misalignment}), is a multiple of the
1196 number of the interleaved accesses. */
1197 int elem_size, mis_in_elements;
1198 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1200 /* FORNOW: handle only known alignment. */
1201 if (!known_alignment_for_access_p (dr))
1202 return false;
1204 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1205 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1207 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1208 return false;
1211 /* If misalignment is known at the compile time then allow peeling
1212 only if natural alignment is reachable through peeling. */
1213 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1215 HOST_WIDE_INT elmsize =
1216 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1217 if (dump_enabled_p ())
1219 dump_printf_loc (MSG_NOTE, vect_location,
1220 "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1221 dump_printf (MSG_NOTE,
1222 ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1224 if (DR_MISALIGNMENT (dr) % elmsize)
1226 if (dump_enabled_p ())
1227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1228 "data size does not divide the misalignment.\n");
1229 return false;
1233 if (!known_alignment_for_access_p (dr))
1235 tree type = TREE_TYPE (DR_REF (dr));
1236 bool is_packed = not_size_aligned (DR_REF (dr));
1237 if (dump_enabled_p ())
1238 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1239 "Unknown misalignment, is_packed = %d",is_packed);
1240 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1241 return true;
1242 else
1243 return false;
1246 return true;
1250 /* Calculate the cost of the memory access represented by DR. */
1252 static void
1253 vect_get_data_access_cost (struct data_reference *dr,
1254 unsigned int *inside_cost,
1255 unsigned int *outside_cost,
1256 stmt_vector_for_cost *body_cost_vec)
1258 gimple stmt = DR_STMT (dr);
1259 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1260 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1261 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1262 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1263 int ncopies = vf / nunits;
1265 if (DR_IS_READ (dr))
1266 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1267 NULL, body_cost_vec, false);
1268 else
1269 vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1271 if (dump_enabled_p ())
1272 dump_printf_loc (MSG_NOTE, vect_location,
1273 "vect_get_data_access_cost: inside_cost = %d, "
1274 "outside_cost = %d.", *inside_cost, *outside_cost);
1278 static hashval_t
1279 vect_peeling_hash (const void *elem)
1281 const struct _vect_peel_info *peel_info;
1283 peel_info = (const struct _vect_peel_info *) elem;
1284 return (hashval_t) peel_info->npeel;
1288 static int
1289 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1291 const struct _vect_peel_info *a, *b;
1293 a = (const struct _vect_peel_info *) elem1;
1294 b = (const struct _vect_peel_info *) elem2;
1295 return (a->npeel == b->npeel);
1299 /* Insert DR into peeling hash table with NPEEL as key. */
1301 static void
1302 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1303 int npeel)
1305 struct _vect_peel_info elem, *slot;
1306 void **new_slot;
1307 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1309 elem.npeel = npeel;
1310 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1311 &elem);
1312 if (slot)
1313 slot->count++;
1314 else
1316 slot = XNEW (struct _vect_peel_info);
1317 slot->npeel = npeel;
1318 slot->dr = dr;
1319 slot->count = 1;
1320 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1321 INSERT);
1322 *new_slot = slot;
1325 if (!supportable_dr_alignment && !flag_vect_cost_model)
1326 slot->count += VECT_MAX_COST;
1330 /* Traverse peeling hash table to find peeling option that aligns maximum
1331 number of data accesses. */
1333 static int
1334 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1336 vect_peel_info elem = (vect_peel_info) *slot;
1337 vect_peel_extended_info max = (vect_peel_extended_info) data;
1339 if (elem->count > max->peel_info.count
1340 || (elem->count == max->peel_info.count
1341 && max->peel_info.npeel > elem->npeel))
1343 max->peel_info.npeel = elem->npeel;
1344 max->peel_info.count = elem->count;
1345 max->peel_info.dr = elem->dr;
1348 return 1;
1352 /* Traverse peeling hash table and calculate cost for each peeling option.
1353 Find the one with the lowest cost. */
1355 static int
1356 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1358 vect_peel_info elem = (vect_peel_info) *slot;
1359 vect_peel_extended_info min = (vect_peel_extended_info) data;
1360 int save_misalignment, dummy;
1361 unsigned int inside_cost = 0, outside_cost = 0, i;
1362 gimple stmt = DR_STMT (elem->dr);
1363 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1364 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1365 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1366 struct data_reference *dr;
1367 stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1368 int single_iter_cost;
1370 prologue_cost_vec.create (2);
1371 body_cost_vec.create (2);
1372 epilogue_cost_vec.create (2);
1374 FOR_EACH_VEC_ELT (datarefs, i, dr)
1376 stmt = DR_STMT (dr);
1377 stmt_info = vinfo_for_stmt (stmt);
1378 /* For interleaving, only the alignment of the first access
1379 matters. */
1380 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1381 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1382 continue;
1384 save_misalignment = DR_MISALIGNMENT (dr);
1385 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1386 vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1387 &body_cost_vec);
1388 SET_DR_MISALIGNMENT (dr, save_misalignment);
1391 single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1392 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1393 &dummy, single_iter_cost,
1394 &prologue_cost_vec,
1395 &epilogue_cost_vec);
1397 /* Prologue and epilogue costs are added to the target model later.
1398 These costs depend only on the scalar iteration cost, the
1399 number of peeling iterations finally chosen, and the number of
1400 misaligned statements. So discard the information found here. */
1401 prologue_cost_vec.release ();
1402 epilogue_cost_vec.release ();
1404 if (inside_cost < min->inside_cost
1405 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1407 min->inside_cost = inside_cost;
1408 min->outside_cost = outside_cost;
1409 min->body_cost_vec.release ();
1410 min->body_cost_vec = body_cost_vec;
1411 min->peel_info.dr = elem->dr;
1412 min->peel_info.npeel = elem->npeel;
1414 else
1415 body_cost_vec.release ();
1417 return 1;
1421 /* Choose best peeling option by traversing peeling hash table and either
1422 choosing an option with the lowest cost (if cost model is enabled) or the
1423 option that aligns as many accesses as possible. */
1425 static struct data_reference *
1426 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1427 unsigned int *npeel,
1428 stmt_vector_for_cost *body_cost_vec)
1430 struct _vect_peel_extended_info res;
1432 res.peel_info.dr = NULL;
1433 res.body_cost_vec = stmt_vector_for_cost();
1435 if (flag_vect_cost_model)
1437 res.inside_cost = INT_MAX;
1438 res.outside_cost = INT_MAX;
1439 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1440 vect_peeling_hash_get_lowest_cost, &res);
1442 else
1444 res.peel_info.count = 0;
1445 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1446 vect_peeling_hash_get_most_frequent, &res);
1449 *npeel = res.peel_info.npeel;
1450 *body_cost_vec = res.body_cost_vec;
1451 return res.peel_info.dr;
1455 /* Function vect_enhance_data_refs_alignment
1457 This pass will use loop versioning and loop peeling in order to enhance
1458 the alignment of data references in the loop.
1460 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1461 original loop is to be vectorized. Any other loops that are created by
1462 the transformations performed in this pass - are not supposed to be
1463 vectorized. This restriction will be relaxed.
1465 This pass will require a cost model to guide it whether to apply peeling
1466 or versioning or a combination of the two. For example, the scheme that
1467 intel uses when given a loop with several memory accesses, is as follows:
1468 choose one memory access ('p') which alignment you want to force by doing
1469 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1470 other accesses are not necessarily aligned, or (2) use loop versioning to
1471 generate one loop in which all accesses are aligned, and another loop in
1472 which only 'p' is necessarily aligned.
1474 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1475 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1476 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1478 Devising a cost model is the most critical aspect of this work. It will
1479 guide us on which access to peel for, whether to use loop versioning, how
1480 many versions to create, etc. The cost model will probably consist of
1481 generic considerations as well as target specific considerations (on
1482 powerpc for example, misaligned stores are more painful than misaligned
1483 loads).
1485 Here are the general steps involved in alignment enhancements:
1487 -- original loop, before alignment analysis:
1488 for (i=0; i<N; i++){
1489 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1490 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1493 -- After vect_compute_data_refs_alignment:
1494 for (i=0; i<N; i++){
1495 x = q[i]; # DR_MISALIGNMENT(q) = 3
1496 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1499 -- Possibility 1: we do loop versioning:
1500 if (p is aligned) {
1501 for (i=0; i<N; i++){ # loop 1A
1502 x = q[i]; # DR_MISALIGNMENT(q) = 3
1503 p[i] = y; # DR_MISALIGNMENT(p) = 0
1506 else {
1507 for (i=0; i<N; i++){ # loop 1B
1508 x = q[i]; # DR_MISALIGNMENT(q) = 3
1509 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1513 -- Possibility 2: we do loop peeling:
1514 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1515 x = q[i];
1516 p[i] = y;
1518 for (i = 3; i < N; i++){ # loop 2A
1519 x = q[i]; # DR_MISALIGNMENT(q) = 0
1520 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1523 -- Possibility 3: combination of loop peeling and versioning:
1524 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1525 x = q[i];
1526 p[i] = y;
1528 if (p is aligned) {
1529 for (i = 3; i<N; i++){ # loop 3A
1530 x = q[i]; # DR_MISALIGNMENT(q) = 0
1531 p[i] = y; # DR_MISALIGNMENT(p) = 0
1534 else {
1535 for (i = 3; i<N; i++){ # loop 3B
1536 x = q[i]; # DR_MISALIGNMENT(q) = 0
1537 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1541 These loops are later passed to loop_transform to be vectorized. The
1542 vectorizer will use the alignment information to guide the transformation
1543 (whether to generate regular loads/stores, or with special handling for
1544 misalignment). */
1546 bool
1547 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1549 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1550 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1551 enum dr_alignment_support supportable_dr_alignment;
1552 struct data_reference *dr0 = NULL, *first_store = NULL;
1553 struct data_reference *dr;
1554 unsigned int i, j;
1555 bool do_peeling = false;
1556 bool do_versioning = false;
1557 bool stat;
1558 gimple stmt;
1559 stmt_vec_info stmt_info;
1560 int vect_versioning_for_alias_required;
1561 unsigned int npeel = 0;
1562 bool all_misalignments_unknown = true;
1563 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1564 unsigned possible_npeel_number = 1;
1565 tree vectype;
1566 unsigned int nelements, mis, same_align_drs_max = 0;
1567 stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost();
1569 if (dump_enabled_p ())
1570 dump_printf_loc (MSG_NOTE, vect_location,
1571 "=== vect_enhance_data_refs_alignment ===");
1573 /* While cost model enhancements are expected in the future, the high level
1574 view of the code at this time is as follows:
1576 A) If there is a misaligned access then see if peeling to align
1577 this access can make all data references satisfy
1578 vect_supportable_dr_alignment. If so, update data structures
1579 as needed and return true.
1581 B) If peeling wasn't possible and there is a data reference with an
1582 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1583 then see if loop versioning checks can be used to make all data
1584 references satisfy vect_supportable_dr_alignment. If so, update
1585 data structures as needed and return true.
1587 C) If neither peeling nor versioning were successful then return false if
1588 any data reference does not satisfy vect_supportable_dr_alignment.
1590 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1592 Note, Possibility 3 above (which is peeling and versioning together) is not
1593 being done at this time. */
1595 /* (1) Peeling to force alignment. */
1597 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1598 Considerations:
1599 + How many accesses will become aligned due to the peeling
1600 - How many accesses will become unaligned due to the peeling,
1601 and the cost of misaligned accesses.
1602 - The cost of peeling (the extra runtime checks, the increase
1603 in code size). */
1605 FOR_EACH_VEC_ELT (datarefs, i, dr)
1607 stmt = DR_STMT (dr);
1608 stmt_info = vinfo_for_stmt (stmt);
1610 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1611 continue;
1613 /* For interleaving, only the alignment of the first access
1614 matters. */
1615 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1616 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1617 continue;
1619 /* FORNOW: Any strided load prevents peeling. The induction
1620 variable analysis will fail when the prologue loop is generated,
1621 and so we can't generate the new base for the pointer. */
1622 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1624 if (dump_enabled_p ())
1625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1626 "strided load prevents peeling");
1627 do_peeling = false;
1628 break;
1631 /* For invariant accesses there is nothing to enhance. */
1632 if (integer_zerop (DR_STEP (dr)))
1633 continue;
1635 /* Strided loads perform only component accesses, alignment is
1636 irrelevant for them. */
1637 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1638 continue;
1640 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1641 do_peeling = vector_alignment_reachable_p (dr);
1642 if (do_peeling)
1644 if (known_alignment_for_access_p (dr))
1646 unsigned int npeel_tmp;
1647 bool negative = tree_int_cst_compare (DR_STEP (dr),
1648 size_zero_node) < 0;
1650 /* Save info about DR in the hash table. */
1651 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1652 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1653 htab_create (1, vect_peeling_hash,
1654 vect_peeling_hash_eq, free);
1656 vectype = STMT_VINFO_VECTYPE (stmt_info);
1657 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1658 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1659 TREE_TYPE (DR_REF (dr))));
1660 npeel_tmp = (negative
1661 ? (mis - nelements) : (nelements - mis))
1662 & (nelements - 1);
1664 /* For multiple types, it is possible that the bigger type access
1665 will have more than one peeling option. E.g., a loop with two
1666 types: one of size (vector size / 4), and the other one of
1667 size (vector size / 8). Vectorization factor will 8. If both
1668 access are misaligned by 3, the first one needs one scalar
1669 iteration to be aligned, and the second one needs 5. But the
1670 the first one will be aligned also by peeling 5 scalar
1671 iterations, and in that case both accesses will be aligned.
1672 Hence, except for the immediate peeling amount, we also want
1673 to try to add full vector size, while we don't exceed
1674 vectorization factor.
1675 We do this automtically for cost model, since we calculate cost
1676 for every peeling option. */
1677 if (!flag_vect_cost_model)
1678 possible_npeel_number = vf /nelements;
1680 /* Handle the aligned case. We may decide to align some other
1681 access, making DR unaligned. */
1682 if (DR_MISALIGNMENT (dr) == 0)
1684 npeel_tmp = 0;
1685 if (!flag_vect_cost_model)
1686 possible_npeel_number++;
1689 for (j = 0; j < possible_npeel_number; j++)
1691 gcc_assert (npeel_tmp <= vf);
1692 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1693 npeel_tmp += nelements;
1696 all_misalignments_unknown = false;
1697 /* Data-ref that was chosen for the case that all the
1698 misalignments are unknown is not relevant anymore, since we
1699 have a data-ref with known alignment. */
1700 dr0 = NULL;
1702 else
1704 /* If we don't know all the misalignment values, we prefer
1705 peeling for data-ref that has maximum number of data-refs
1706 with the same alignment, unless the target prefers to align
1707 stores over load. */
1708 if (all_misalignments_unknown)
1710 if (same_align_drs_max
1711 < STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ()
1712 || !dr0)
1714 same_align_drs_max
1715 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
1716 dr0 = dr;
1719 if (!first_store && DR_IS_WRITE (dr))
1720 first_store = dr;
1723 /* If there are both known and unknown misaligned accesses in the
1724 loop, we choose peeling amount according to the known
1725 accesses. */
1728 if (!supportable_dr_alignment)
1730 dr0 = dr;
1731 if (!first_store && DR_IS_WRITE (dr))
1732 first_store = dr;
1736 else
1738 if (!aligned_access_p (dr))
1740 if (dump_enabled_p ())
1741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1742 "vector alignment may not be reachable");
1743 break;
1748 vect_versioning_for_alias_required
1749 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1751 /* Temporarily, if versioning for alias is required, we disable peeling
1752 until we support peeling and versioning. Often peeling for alignment
1753 will require peeling for loop-bound, which in turn requires that we
1754 know how to adjust the loop ivs after the loop. */
1755 if (vect_versioning_for_alias_required
1756 || !vect_can_advance_ivs_p (loop_vinfo)
1757 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1758 do_peeling = false;
1760 if (do_peeling && all_misalignments_unknown
1761 && vect_supportable_dr_alignment (dr0, false))
1764 /* Check if the target requires to prefer stores over loads, i.e., if
1765 misaligned stores are more expensive than misaligned loads (taking
1766 drs with same alignment into account). */
1767 if (first_store && DR_IS_READ (dr0))
1769 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1770 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1771 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1772 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1773 stmt_vector_for_cost dummy;
1774 dummy.create (2);
1776 vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1777 &dummy);
1778 vect_get_data_access_cost (first_store, &store_inside_cost,
1779 &store_outside_cost, &dummy);
1781 dummy.release ();
1783 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1784 aligning the load DR0). */
1785 load_inside_penalty = store_inside_cost;
1786 load_outside_penalty = store_outside_cost;
1787 for (i = 0;
1788 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1789 DR_STMT (first_store))).iterate (i, &dr);
1790 i++)
1791 if (DR_IS_READ (dr))
1793 load_inside_penalty += load_inside_cost;
1794 load_outside_penalty += load_outside_cost;
1796 else
1798 load_inside_penalty += store_inside_cost;
1799 load_outside_penalty += store_outside_cost;
1802 /* Calculate the penalty for leaving DR0 unaligned (by
1803 aligning the FIRST_STORE). */
1804 store_inside_penalty = load_inside_cost;
1805 store_outside_penalty = load_outside_cost;
1806 for (i = 0;
1807 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1808 DR_STMT (dr0))).iterate (i, &dr);
1809 i++)
1810 if (DR_IS_READ (dr))
1812 store_inside_penalty += load_inside_cost;
1813 store_outside_penalty += load_outside_cost;
1815 else
1817 store_inside_penalty += store_inside_cost;
1818 store_outside_penalty += store_outside_cost;
1821 if (load_inside_penalty > store_inside_penalty
1822 || (load_inside_penalty == store_inside_penalty
1823 && load_outside_penalty > store_outside_penalty))
1824 dr0 = first_store;
1827 /* In case there are only loads with different unknown misalignments, use
1828 peeling only if it may help to align other accesses in the loop. */
1829 if (!first_store
1830 && !STMT_VINFO_SAME_ALIGN_REFS (
1831 vinfo_for_stmt (DR_STMT (dr0))).length ()
1832 && vect_supportable_dr_alignment (dr0, false)
1833 != dr_unaligned_supported)
1834 do_peeling = false;
1837 if (do_peeling && !dr0)
1839 /* Peeling is possible, but there is no data access that is not supported
1840 unless aligned. So we try to choose the best possible peeling. */
1842 /* We should get here only if there are drs with known misalignment. */
1843 gcc_assert (!all_misalignments_unknown);
1845 /* Choose the best peeling from the hash table. */
1846 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
1847 &body_cost_vec);
1848 if (!dr0 || !npeel)
1849 do_peeling = false;
1852 if (do_peeling)
1854 stmt = DR_STMT (dr0);
1855 stmt_info = vinfo_for_stmt (stmt);
1856 vectype = STMT_VINFO_VECTYPE (stmt_info);
1857 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1859 if (known_alignment_for_access_p (dr0))
1861 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1862 size_zero_node) < 0;
1863 if (!npeel)
1865 /* Since it's known at compile time, compute the number of
1866 iterations in the peeled loop (the peeling factor) for use in
1867 updating DR_MISALIGNMENT values. The peeling factor is the
1868 vectorization factor minus the misalignment as an element
1869 count. */
1870 mis = DR_MISALIGNMENT (dr0);
1871 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1872 npeel = ((negative ? mis - nelements : nelements - mis)
1873 & (nelements - 1));
1876 /* For interleaved data access every iteration accesses all the
1877 members of the group, therefore we divide the number of iterations
1878 by the group size. */
1879 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1880 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1881 npeel /= GROUP_SIZE (stmt_info);
1883 if (dump_enabled_p ())
1884 dump_printf_loc (MSG_NOTE, vect_location,
1885 "Try peeling by %d", npeel);
1888 /* Ensure that all data refs can be vectorized after the peel. */
1889 FOR_EACH_VEC_ELT (datarefs, i, dr)
1891 int save_misalignment;
1893 if (dr == dr0)
1894 continue;
1896 stmt = DR_STMT (dr);
1897 stmt_info = vinfo_for_stmt (stmt);
1898 /* For interleaving, only the alignment of the first access
1899 matters. */
1900 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1901 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1902 continue;
1904 /* Strided loads perform only component accesses, alignment is
1905 irrelevant for them. */
1906 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1907 continue;
1909 save_misalignment = DR_MISALIGNMENT (dr);
1910 vect_update_misalignment_for_peel (dr, dr0, npeel);
1911 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1912 SET_DR_MISALIGNMENT (dr, save_misalignment);
1914 if (!supportable_dr_alignment)
1916 do_peeling = false;
1917 break;
1921 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1923 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1924 if (!stat)
1925 do_peeling = false;
1926 else
1928 body_cost_vec.release ();
1929 return stat;
1933 if (do_peeling)
1935 stmt_info_for_cost *si;
1936 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
1938 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1939 If the misalignment of DR_i is identical to that of dr0 then set
1940 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1941 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1942 by the peeling factor times the element size of DR_i (MOD the
1943 vectorization factor times the size). Otherwise, the
1944 misalignment of DR_i must be set to unknown. */
1945 FOR_EACH_VEC_ELT (datarefs, i, dr)
1946 if (dr != dr0)
1947 vect_update_misalignment_for_peel (dr, dr0, npeel);
1949 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1950 if (npeel)
1951 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1952 else
1953 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1954 SET_DR_MISALIGNMENT (dr0, 0);
1955 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE, vect_location,
1958 "Alignment of access forced using peeling.");
1959 dump_printf_loc (MSG_NOTE, vect_location,
1960 "Peeling for alignment will be applied.");
1962 /* We've delayed passing the inside-loop peeling costs to the
1963 target cost model until we were sure peeling would happen.
1964 Do so now. */
1965 if (body_cost_vec.exists ())
1967 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1969 struct _stmt_vec_info *stmt_info
1970 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1971 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1972 si->misalign, vect_body);
1974 body_cost_vec.release ();
1977 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1978 gcc_assert (stat);
1979 return stat;
1983 body_cost_vec.release ();
1985 /* (2) Versioning to force alignment. */
1987 /* Try versioning if:
1988 1) flag_tree_vect_loop_version is TRUE
1989 2) optimize loop for speed
1990 3) there is at least one unsupported misaligned data ref with an unknown
1991 misalignment, and
1992 4) all misaligned data refs with a known misalignment are supported, and
1993 5) the number of runtime alignment checks is within reason. */
1995 do_versioning =
1996 flag_tree_vect_loop_version
1997 && optimize_loop_nest_for_speed_p (loop)
1998 && (!loop->inner); /* FORNOW */
2000 if (do_versioning)
2002 FOR_EACH_VEC_ELT (datarefs, i, dr)
2004 stmt = DR_STMT (dr);
2005 stmt_info = vinfo_for_stmt (stmt);
2007 /* For interleaving, only the alignment of the first access
2008 matters. */
2009 if (aligned_access_p (dr)
2010 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
2011 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
2012 continue;
2014 /* Strided loads perform only component accesses, alignment is
2015 irrelevant for them. */
2016 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
2017 continue;
2019 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
2021 if (!supportable_dr_alignment)
2023 gimple stmt;
2024 int mask;
2025 tree vectype;
2027 if (known_alignment_for_access_p (dr)
2028 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
2029 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2031 do_versioning = false;
2032 break;
2035 stmt = DR_STMT (dr);
2036 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2037 gcc_assert (vectype);
2039 /* The rightmost bits of an aligned address must be zeros.
2040 Construct the mask needed for this test. For example,
2041 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2042 mask must be 15 = 0xf. */
2043 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2045 /* FORNOW: use the same mask to test all potentially unaligned
2046 references in the loop. The vectorizer currently supports
2047 a single vector size, see the reference to
2048 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2049 vectorization factor is computed. */
2050 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2051 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2052 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2053 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
2054 DR_STMT (dr));
2058 /* Versioning requires at least one misaligned data reference. */
2059 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2060 do_versioning = false;
2061 else if (!do_versioning)
2062 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
2065 if (do_versioning)
2067 vec<gimple> may_misalign_stmts
2068 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2069 gimple stmt;
2071 /* It can now be assumed that the data references in the statements
2072 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2073 of the loop being vectorized. */
2074 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
2076 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2077 dr = STMT_VINFO_DATA_REF (stmt_info);
2078 SET_DR_MISALIGNMENT (dr, 0);
2079 if (dump_enabled_p ())
2080 dump_printf_loc (MSG_NOTE, vect_location,
2081 "Alignment of access forced using versioning.");
2084 if (dump_enabled_p ())
2085 dump_printf_loc (MSG_NOTE, vect_location,
2086 "Versioning for alignment will be applied.");
2088 /* Peeling and versioning can't be done together at this time. */
2089 gcc_assert (! (do_peeling && do_versioning));
2091 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2092 gcc_assert (stat);
2093 return stat;
2096 /* This point is reached if neither peeling nor versioning is being done. */
2097 gcc_assert (! (do_peeling || do_versioning));
2099 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2100 return stat;
2104 /* Function vect_find_same_alignment_drs.
2106 Update group and alignment relations according to the chosen
2107 vectorization factor. */
2109 static void
2110 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
2111 loop_vec_info loop_vinfo)
2113 unsigned int i;
2114 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2115 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2116 struct data_reference *dra = DDR_A (ddr);
2117 struct data_reference *drb = DDR_B (ddr);
2118 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2119 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2120 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
2121 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
2122 lambda_vector dist_v;
2123 unsigned int loop_depth;
2125 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2126 return;
2128 if (dra == drb)
2129 return;
2131 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2132 return;
2134 /* Loop-based vectorization and known data dependence. */
2135 if (DDR_NUM_DIST_VECTS (ddr) == 0)
2136 return;
2138 /* Data-dependence analysis reports a distance vector of zero
2139 for data-references that overlap only in the first iteration
2140 but have different sign step (see PR45764).
2141 So as a sanity check require equal DR_STEP. */
2142 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2143 return;
2145 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
2146 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
2148 int dist = dist_v[loop_depth];
2150 if (dump_enabled_p ())
2151 dump_printf_loc (MSG_NOTE, vect_location,
2152 "dependence distance = %d.", dist);
2154 /* Same loop iteration. */
2155 if (dist == 0
2156 || (dist % vectorization_factor == 0 && dra_size == drb_size))
2158 /* Two references with distance zero have the same alignment. */
2159 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
2160 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
2161 if (dump_enabled_p ())
2163 dump_printf_loc (MSG_NOTE, vect_location,
2164 "accesses have the same alignment.");
2165 dump_printf (MSG_NOTE,
2166 "dependence distance modulo vf == 0 between ");
2167 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2168 dump_printf (MSG_NOTE, " and ");
2169 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
2176 /* Function vect_analyze_data_refs_alignment
2178 Analyze the alignment of the data-references in the loop.
2179 Return FALSE if a data reference is found that cannot be vectorized. */
2181 bool
2182 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2183 bb_vec_info bb_vinfo)
2185 if (dump_enabled_p ())
2186 dump_printf_loc (MSG_NOTE, vect_location,
2187 "=== vect_analyze_data_refs_alignment ===");
2189 /* Mark groups of data references with same alignment using
2190 data dependence information. */
2191 if (loop_vinfo)
2193 vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2194 struct data_dependence_relation *ddr;
2195 unsigned int i;
2197 FOR_EACH_VEC_ELT (ddrs, i, ddr)
2198 vect_find_same_alignment_drs (ddr, loop_vinfo);
2201 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2203 if (dump_enabled_p ())
2204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2205 "not vectorized: can't calculate alignment "
2206 "for data ref.");
2207 return false;
2210 return true;
2214 /* Analyze groups of accesses: check that DR belongs to a group of
2215 accesses of legal size, step, etc. Detect gaps, single element
2216 interleaving, and other special cases. Set grouped access info.
2217 Collect groups of strided stores for further use in SLP analysis. */
2219 static bool
2220 vect_analyze_group_access (struct data_reference *dr)
2222 tree step = DR_STEP (dr);
2223 tree scalar_type = TREE_TYPE (DR_REF (dr));
2224 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2225 gimple stmt = DR_STMT (dr);
2226 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2228 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2229 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2230 HOST_WIDE_INT groupsize, last_accessed_element = 1;
2231 bool slp_impossible = false;
2232 struct loop *loop = NULL;
2234 if (loop_vinfo)
2235 loop = LOOP_VINFO_LOOP (loop_vinfo);
2237 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2238 size of the interleaving group (including gaps). */
2239 groupsize = dr_step / type_size;
2241 /* Not consecutive access is possible only if it is a part of interleaving. */
2242 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2244 /* Check if it this DR is a part of interleaving, and is a single
2245 element of the group that is accessed in the loop. */
2247 /* Gaps are supported only for loads. STEP must be a multiple of the type
2248 size. The size of the group must be a power of 2. */
2249 if (DR_IS_READ (dr)
2250 && (dr_step % type_size) == 0
2251 && groupsize > 0
2252 && exact_log2 (groupsize) != -1)
2254 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2255 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2256 if (dump_enabled_p ())
2258 dump_printf_loc (MSG_NOTE, vect_location,
2259 "Detected single element interleaving ");
2260 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2261 dump_printf (MSG_NOTE, " step ");
2262 dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2265 if (loop_vinfo)
2267 if (dump_enabled_p ())
2268 dump_printf_loc (MSG_NOTE, vect_location,
2269 "Data access with gaps requires scalar "
2270 "epilogue loop");
2271 if (loop->inner)
2273 if (dump_enabled_p ())
2274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2275 "Peeling for outer loop is not"
2276 " supported");
2277 return false;
2280 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2283 return true;
2286 if (dump_enabled_p ())
2288 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2289 "not consecutive access ");
2290 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2293 if (bb_vinfo)
2295 /* Mark the statement as unvectorizable. */
2296 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2297 return true;
2300 return false;
2303 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2305 /* First stmt in the interleaving chain. Check the chain. */
2306 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2307 struct data_reference *data_ref = dr;
2308 unsigned int count = 1;
2309 tree next_step;
2310 tree prev_init = DR_INIT (data_ref);
2311 gimple prev = stmt;
2312 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2314 while (next)
2316 /* Skip same data-refs. In case that two or more stmts share
2317 data-ref (supported only for loads), we vectorize only the first
2318 stmt, and the rest get their vectorized loads from the first
2319 one. */
2320 if (!tree_int_cst_compare (DR_INIT (data_ref),
2321 DR_INIT (STMT_VINFO_DATA_REF (
2322 vinfo_for_stmt (next)))))
2324 if (DR_IS_WRITE (data_ref))
2326 if (dump_enabled_p ())
2327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2328 "Two store stmts share the same dr.");
2329 return false;
2332 /* Check that there is no load-store dependencies for this loads
2333 to prevent a case of load-store-load to the same location. */
2334 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2335 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2337 if (dump_enabled_p ())
2338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2339 "READ_WRITE dependence in interleaving.");
2340 return false;
2343 /* For load use the same data-ref load. */
2344 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2346 prev = next;
2347 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2348 continue;
2351 prev = next;
2353 /* Check that all the accesses have the same STEP. */
2354 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2355 if (tree_int_cst_compare (step, next_step))
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2359 "not consecutive access in interleaving");
2360 return false;
2363 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2364 /* Check that the distance between two accesses is equal to the type
2365 size. Otherwise, we have gaps. */
2366 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2367 - TREE_INT_CST_LOW (prev_init)) / type_size;
2368 if (diff != 1)
2370 /* FORNOW: SLP of accesses with gaps is not supported. */
2371 slp_impossible = true;
2372 if (DR_IS_WRITE (data_ref))
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 "interleaved store with gaps");
2377 return false;
2380 gaps += diff - 1;
2383 last_accessed_element += diff;
2385 /* Store the gap from the previous member of the group. If there is no
2386 gap in the access, GROUP_GAP is always 1. */
2387 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2389 prev_init = DR_INIT (data_ref);
2390 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2391 /* Count the number of data-refs in the chain. */
2392 count++;
2395 /* COUNT is the number of accesses found, we multiply it by the size of
2396 the type to get COUNT_IN_BYTES. */
2397 count_in_bytes = type_size * count;
2399 /* Check that the size of the interleaving (including gaps) is not
2400 greater than STEP. */
2401 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2403 if (dump_enabled_p ())
2405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2406 "interleaving size is greater than step for ");
2407 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dr));
2409 return false;
2412 /* Check that the size of the interleaving is equal to STEP for stores,
2413 i.e., that there are no gaps. */
2414 if (dr_step && dr_step != count_in_bytes)
2416 if (DR_IS_READ (dr))
2418 slp_impossible = true;
2419 /* There is a gap after the last load in the group. This gap is a
2420 difference between the groupsize and the number of elements.
2421 When there is no gap, this difference should be 0. */
2422 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
2424 else
2426 if (dump_enabled_p ())
2427 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2428 "interleaved store with gaps");
2429 return false;
2433 /* Check that STEP is a multiple of type size. */
2434 if (dr_step && (dr_step % type_size) != 0)
2436 if (dump_enabled_p ())
2438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2439 "step is not a multiple of type size: step ");
2440 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
2441 dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
2442 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2443 TYPE_SIZE_UNIT (scalar_type));
2445 return false;
2448 if (groupsize == 0)
2449 groupsize = count;
2451 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2452 if (dump_enabled_p ())
2453 dump_printf_loc (MSG_NOTE, vect_location,
2454 "Detected interleaving of size %d", (int)groupsize);
2456 /* SLP: create an SLP data structure for every interleaving group of
2457 stores for further analysis in vect_analyse_slp. */
2458 if (DR_IS_WRITE (dr) && !slp_impossible)
2460 if (loop_vinfo)
2461 LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
2462 if (bb_vinfo)
2463 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
2466 /* There is a gap in the end of the group. */
2467 if (groupsize - last_accessed_element > 0 && loop_vinfo)
2469 if (dump_enabled_p ())
2470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2471 "Data access with gaps requires scalar "
2472 "epilogue loop");
2473 if (loop->inner)
2475 if (dump_enabled_p ())
2476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2477 "Peeling for outer loop is not supported");
2478 return false;
2481 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2485 return true;
2489 /* Analyze the access pattern of the data-reference DR.
2490 In case of non-consecutive accesses call vect_analyze_group_access() to
2491 analyze groups of accesses. */
2493 static bool
2494 vect_analyze_data_ref_access (struct data_reference *dr)
2496 tree step = DR_STEP (dr);
2497 tree scalar_type = TREE_TYPE (DR_REF (dr));
2498 gimple stmt = DR_STMT (dr);
2499 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2500 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2501 struct loop *loop = NULL;
2503 if (loop_vinfo)
2504 loop = LOOP_VINFO_LOOP (loop_vinfo);
2506 if (loop_vinfo && !step)
2508 if (dump_enabled_p ())
2509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2510 "bad data-ref access in loop");
2511 return false;
2514 /* Allow invariant loads in loops. */
2515 if (loop_vinfo && integer_zerop (step))
2517 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2518 return DR_IS_READ (dr);
2521 if (loop && nested_in_vect_loop_p (loop, stmt))
2523 /* Interleaved accesses are not yet supported within outer-loop
2524 vectorization for references in the inner-loop. */
2525 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2527 /* For the rest of the analysis we use the outer-loop step. */
2528 step = STMT_VINFO_DR_STEP (stmt_info);
2529 if (integer_zerop (step))
2531 if (dump_enabled_p ())
2532 dump_printf_loc (MSG_NOTE, vect_location,
2533 "zero step in outer loop.");
2534 if (DR_IS_READ (dr))
2535 return true;
2536 else
2537 return false;
2541 /* Consecutive? */
2542 if (TREE_CODE (step) == INTEGER_CST)
2544 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2545 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2546 || (dr_step < 0
2547 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2549 /* Mark that it is not interleaving. */
2550 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2551 return true;
2555 if (loop && nested_in_vect_loop_p (loop, stmt))
2557 if (dump_enabled_p ())
2558 dump_printf_loc (MSG_NOTE, vect_location,
2559 "grouped access in outer loop.");
2560 return false;
2563 /* Assume this is a DR handled by non-constant strided load case. */
2564 if (TREE_CODE (step) != INTEGER_CST)
2565 return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2567 /* Not consecutive access - check if it's a part of interleaving group. */
2568 return vect_analyze_group_access (dr);
2572 /* Function vect_analyze_data_ref_accesses.
2574 Analyze the access pattern of all the data references in the loop.
2576 FORNOW: the only access pattern that is considered vectorizable is a
2577 simple step 1 (consecutive) access.
2579 FORNOW: handle only arrays and pointer accesses. */
2581 bool
2582 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2584 unsigned int i;
2585 vec<data_reference_p> datarefs;
2586 struct data_reference *dr;
2588 if (dump_enabled_p ())
2589 dump_printf_loc (MSG_NOTE, vect_location,
2590 "=== vect_analyze_data_ref_accesses ===");
2592 if (loop_vinfo)
2593 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2594 else
2595 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2597 FOR_EACH_VEC_ELT (datarefs, i, dr)
2598 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2599 && !vect_analyze_data_ref_access (dr))
2601 if (dump_enabled_p ())
2602 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2603 "not vectorized: complicated access pattern.");
2605 if (bb_vinfo)
2607 /* Mark the statement as not vectorizable. */
2608 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2609 continue;
2611 else
2612 return false;
2615 return true;
2618 /* Function vect_prune_runtime_alias_test_list.
2620 Prune a list of ddrs to be tested at run-time by versioning for alias.
2621 Return FALSE if resulting list of ddrs is longer then allowed by
2622 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2624 bool
2625 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2627 vec<ddr_p> ddrs =
2628 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2629 unsigned i, j;
2631 if (dump_enabled_p ())
2632 dump_printf_loc (MSG_NOTE, vect_location,
2633 "=== vect_prune_runtime_alias_test_list ===");
2635 for (i = 0; i < ddrs.length (); )
2637 bool found;
2638 ddr_p ddr_i;
2640 ddr_i = ddrs[i];
2641 found = false;
2643 for (j = 0; j < i; j++)
2645 ddr_p ddr_j = ddrs[j];
2647 if (vect_vfa_range_equal (ddr_i, ddr_j))
2649 if (dump_enabled_p ())
2651 dump_printf_loc (MSG_NOTE, vect_location,
2652 "found equal ranges ");
2653 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr_i)));
2654 dump_printf (MSG_NOTE, ", ");
2655 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr_i)));
2656 dump_printf (MSG_NOTE, " and ");
2657 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr_j)));
2658 dump_printf (MSG_NOTE, ", ");
2659 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr_j)));
2661 found = true;
2662 break;
2666 if (found)
2668 ddrs.ordered_remove (i);
2669 continue;
2671 i++;
2674 if (ddrs.length () >
2675 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2677 if (dump_enabled_p ())
2679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2680 "disable versioning for alias - max number of "
2681 "generated checks exceeded.");
2684 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).truncate (0);
2686 return false;
2689 return true;
2692 /* Check whether a non-affine read in stmt is suitable for gather load
2693 and if so, return a builtin decl for that operation. */
2695 tree
2696 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2697 tree *offp, int *scalep)
2699 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2700 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2701 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2702 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2703 tree offtype = NULL_TREE;
2704 tree decl, base, off;
2705 enum machine_mode pmode;
2706 int punsignedp, pvolatilep;
2708 /* The gather builtins need address of the form
2709 loop_invariant + vector * {1, 2, 4, 8}
2711 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2712 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2713 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2714 multiplications and additions in it. To get a vector, we need
2715 a single SSA_NAME that will be defined in the loop and will
2716 contain everything that is not loop invariant and that can be
2717 vectorized. The following code attempts to find such a preexistng
2718 SSA_NAME OFF and put the loop invariants into a tree BASE
2719 that can be gimplified before the loop. */
2720 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2721 &pmode, &punsignedp, &pvolatilep, false);
2722 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2724 if (TREE_CODE (base) == MEM_REF)
2726 if (!integer_zerop (TREE_OPERAND (base, 1)))
2728 if (off == NULL_TREE)
2730 double_int moff = mem_ref_offset (base);
2731 off = double_int_to_tree (sizetype, moff);
2733 else
2734 off = size_binop (PLUS_EXPR, off,
2735 fold_convert (sizetype, TREE_OPERAND (base, 1)));
2737 base = TREE_OPERAND (base, 0);
2739 else
2740 base = build_fold_addr_expr (base);
2742 if (off == NULL_TREE)
2743 off = size_zero_node;
2745 /* If base is not loop invariant, either off is 0, then we start with just
2746 the constant offset in the loop invariant BASE and continue with base
2747 as OFF, otherwise give up.
2748 We could handle that case by gimplifying the addition of base + off
2749 into some SSA_NAME and use that as off, but for now punt. */
2750 if (!expr_invariant_in_loop_p (loop, base))
2752 if (!integer_zerop (off))
2753 return NULL_TREE;
2754 off = base;
2755 base = size_int (pbitpos / BITS_PER_UNIT);
2757 /* Otherwise put base + constant offset into the loop invariant BASE
2758 and continue with OFF. */
2759 else
2761 base = fold_convert (sizetype, base);
2762 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2765 /* OFF at this point may be either a SSA_NAME or some tree expression
2766 from get_inner_reference. Try to peel off loop invariants from it
2767 into BASE as long as possible. */
2768 STRIP_NOPS (off);
2769 while (offtype == NULL_TREE)
2771 enum tree_code code;
2772 tree op0, op1, add = NULL_TREE;
2774 if (TREE_CODE (off) == SSA_NAME)
2776 gimple def_stmt = SSA_NAME_DEF_STMT (off);
2778 if (expr_invariant_in_loop_p (loop, off))
2779 return NULL_TREE;
2781 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2782 break;
2784 op0 = gimple_assign_rhs1 (def_stmt);
2785 code = gimple_assign_rhs_code (def_stmt);
2786 op1 = gimple_assign_rhs2 (def_stmt);
2788 else
2790 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2791 return NULL_TREE;
2792 code = TREE_CODE (off);
2793 extract_ops_from_tree (off, &code, &op0, &op1);
2795 switch (code)
2797 case POINTER_PLUS_EXPR:
2798 case PLUS_EXPR:
2799 if (expr_invariant_in_loop_p (loop, op0))
2801 add = op0;
2802 off = op1;
2803 do_add:
2804 add = fold_convert (sizetype, add);
2805 if (scale != 1)
2806 add = size_binop (MULT_EXPR, add, size_int (scale));
2807 base = size_binop (PLUS_EXPR, base, add);
2808 continue;
2810 if (expr_invariant_in_loop_p (loop, op1))
2812 add = op1;
2813 off = op0;
2814 goto do_add;
2816 break;
2817 case MINUS_EXPR:
2818 if (expr_invariant_in_loop_p (loop, op1))
2820 add = fold_convert (sizetype, op1);
2821 add = size_binop (MINUS_EXPR, size_zero_node, add);
2822 off = op0;
2823 goto do_add;
2825 break;
2826 case MULT_EXPR:
2827 if (scale == 1 && host_integerp (op1, 0))
2829 scale = tree_low_cst (op1, 0);
2830 off = op0;
2831 continue;
2833 break;
2834 case SSA_NAME:
2835 off = op0;
2836 continue;
2837 CASE_CONVERT:
2838 if (!POINTER_TYPE_P (TREE_TYPE (op0))
2839 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2840 break;
2841 if (TYPE_PRECISION (TREE_TYPE (op0))
2842 == TYPE_PRECISION (TREE_TYPE (off)))
2844 off = op0;
2845 continue;
2847 if (TYPE_PRECISION (TREE_TYPE (op0))
2848 < TYPE_PRECISION (TREE_TYPE (off)))
2850 off = op0;
2851 offtype = TREE_TYPE (off);
2852 STRIP_NOPS (off);
2853 continue;
2855 break;
2856 default:
2857 break;
2859 break;
2862 /* If at the end OFF still isn't a SSA_NAME or isn't
2863 defined in the loop, punt. */
2864 if (TREE_CODE (off) != SSA_NAME
2865 || expr_invariant_in_loop_p (loop, off))
2866 return NULL_TREE;
2868 if (offtype == NULL_TREE)
2869 offtype = TREE_TYPE (off);
2871 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2872 offtype, scale);
2873 if (decl == NULL_TREE)
2874 return NULL_TREE;
2876 if (basep)
2877 *basep = base;
2878 if (offp)
2879 *offp = off;
2880 if (scalep)
2881 *scalep = scale;
2882 return decl;
2885 /* Check wether a non-affine load in STMT (being in the loop referred to
2886 in LOOP_VINFO) is suitable for handling as strided load. That is the case
2887 if its address is a simple induction variable. If so return the base
2888 of that induction variable in *BASEP and the (loop-invariant) step
2889 in *STEPP, both only when that pointer is non-zero.
2891 This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
2892 base pointer) only. */
2894 bool
2895 vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2896 tree *stepp)
2898 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2899 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2900 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2901 tree base, off;
2902 affine_iv iv;
2904 if (!DR_IS_READ (dr))
2905 return false;
2907 base = DR_REF (dr);
2909 if (TREE_CODE (base) == ARRAY_REF)
2911 off = TREE_OPERAND (base, 1);
2912 base = TREE_OPERAND (base, 0);
2914 else if (TREE_CODE (base) == MEM_REF)
2916 off = TREE_OPERAND (base, 0);
2917 base = TREE_OPERAND (base, 1);
2919 else
2920 return false;
2922 if (TREE_CODE (off) != SSA_NAME)
2923 return false;
2925 if (!expr_invariant_in_loop_p (loop, base)
2926 || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
2927 return false;
2929 if (basep)
2930 *basep = iv.base;
2931 if (stepp)
2932 *stepp = iv.step;
2933 return true;
2936 /* Function vect_analyze_data_refs.
2938 Find all the data references in the loop or basic block.
2940 The general structure of the analysis of data refs in the vectorizer is as
2941 follows:
2942 1- vect_analyze_data_refs(loop/bb): call
2943 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2944 in the loop/bb and their dependences.
2945 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2946 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2947 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2951 bool
2952 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2953 bb_vec_info bb_vinfo,
2954 int *min_vf)
2956 struct loop *loop = NULL;
2957 basic_block bb = NULL;
2958 unsigned int i;
2959 vec<data_reference_p> datarefs;
2960 struct data_reference *dr;
2961 tree scalar_type;
2962 bool res, stop_bb_analysis = false;
2964 if (dump_enabled_p ())
2965 dump_printf_loc (MSG_NOTE, vect_location,
2966 "=== vect_analyze_data_refs ===\n");
2968 if (loop_vinfo)
2970 loop = LOOP_VINFO_LOOP (loop_vinfo);
2971 res = compute_data_dependences_for_loop
2972 (loop, true,
2973 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2974 &LOOP_VINFO_DATAREFS (loop_vinfo),
2975 &LOOP_VINFO_DDRS (loop_vinfo));
2977 if (!res)
2979 if (dump_enabled_p ())
2980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2981 "not vectorized: loop contains function calls"
2982 " or data references that cannot be analyzed");
2983 return false;
2986 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2988 else
2990 gimple_stmt_iterator gsi;
2992 bb = BB_VINFO_BB (bb_vinfo);
2993 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2995 gimple stmt = gsi_stmt (gsi);
2996 if (!find_data_references_in_stmt (NULL, stmt,
2997 &BB_VINFO_DATAREFS (bb_vinfo)))
2999 /* Mark the rest of the basic-block as unvectorizable. */
3000 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3002 stmt = gsi_stmt (gsi);
3003 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
3005 break;
3008 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
3009 &BB_VINFO_DDRS (bb_vinfo),
3010 vNULL, true))
3012 if (dump_enabled_p ())
3013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3014 "not vectorized: basic block contains function"
3015 " calls or data references that cannot be"
3016 " analyzed");
3017 return false;
3020 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
3023 /* Go through the data-refs, check that the analysis succeeded. Update
3024 pointer from stmt_vec_info struct to DR and vectype. */
3026 FOR_EACH_VEC_ELT (datarefs, i, dr)
3028 gimple stmt;
3029 stmt_vec_info stmt_info;
3030 tree base, offset, init;
3031 bool gather = false;
3032 int vf;
3034 if (!dr || !DR_REF (dr))
3036 if (dump_enabled_p ())
3037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3038 "not vectorized: unhandled data-ref ");
3039 return false;
3042 stmt = DR_STMT (dr);
3043 stmt_info = vinfo_for_stmt (stmt);
3045 if (stop_bb_analysis)
3047 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3048 continue;
3051 /* Check that analysis of the data-ref succeeded. */
3052 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3053 || !DR_STEP (dr))
3055 /* If target supports vector gather loads, see if they can't
3056 be used. */
3057 if (loop_vinfo
3058 && DR_IS_READ (dr)
3059 && !TREE_THIS_VOLATILE (DR_REF (dr))
3060 && targetm.vectorize.builtin_gather != NULL
3061 && !nested_in_vect_loop_p (loop, stmt))
3063 struct data_reference *newdr
3064 = create_data_ref (NULL, loop_containing_stmt (stmt),
3065 DR_REF (dr), stmt, true);
3066 gcc_assert (newdr != NULL && DR_REF (newdr));
3067 if (DR_BASE_ADDRESS (newdr)
3068 && DR_OFFSET (newdr)
3069 && DR_INIT (newdr)
3070 && DR_STEP (newdr)
3071 && integer_zerop (DR_STEP (newdr)))
3073 dr = newdr;
3074 gather = true;
3076 else
3077 free_data_ref (newdr);
3080 if (!gather)
3082 if (dump_enabled_p ())
3084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3085 "not vectorized: data ref analysis "
3086 "failed ");
3087 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3090 if (bb_vinfo)
3092 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3093 stop_bb_analysis = true;
3094 continue;
3097 return false;
3101 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3103 if (dump_enabled_p ())
3104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3105 "not vectorized: base addr of dr is a "
3106 "constant");
3108 if (bb_vinfo)
3110 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3111 stop_bb_analysis = true;
3112 continue;
3115 if (gather)
3116 free_data_ref (dr);
3117 return false;
3120 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3122 if (dump_enabled_p ())
3124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3125 "not vectorized: volatile type ");
3126 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3129 if (bb_vinfo)
3131 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3132 stop_bb_analysis = true;
3133 continue;
3136 return false;
3139 if (stmt_can_throw_internal (stmt))
3141 if (dump_enabled_p ())
3143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3144 "not vectorized: statement can throw an "
3145 "exception ");
3146 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3149 if (bb_vinfo)
3151 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3152 stop_bb_analysis = true;
3153 continue;
3156 if (gather)
3157 free_data_ref (dr);
3158 return false;
3161 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3162 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3164 if (dump_enabled_p ())
3166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3167 "not vectorized: statement is bitfield "
3168 "access ");
3169 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3172 if (bb_vinfo)
3174 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3175 stop_bb_analysis = true;
3176 continue;
3179 if (gather)
3180 free_data_ref (dr);
3181 return false;
3184 base = unshare_expr (DR_BASE_ADDRESS (dr));
3185 offset = unshare_expr (DR_OFFSET (dr));
3186 init = unshare_expr (DR_INIT (dr));
3188 if (is_gimple_call (stmt))
3190 if (dump_enabled_p ())
3192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3193 "not vectorized: dr in a call ");
3194 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3197 if (bb_vinfo)
3199 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3200 stop_bb_analysis = true;
3201 continue;
3204 if (gather)
3205 free_data_ref (dr);
3206 return false;
3209 /* Update DR field in stmt_vec_info struct. */
3211 /* If the dataref is in an inner-loop of the loop that is considered for
3212 for vectorization, we also want to analyze the access relative to
3213 the outer-loop (DR contains information only relative to the
3214 inner-most enclosing loop). We do that by building a reference to the
3215 first location accessed by the inner-loop, and analyze it relative to
3216 the outer-loop. */
3217 if (loop && nested_in_vect_loop_p (loop, stmt))
3219 tree outer_step, outer_base, outer_init;
3220 HOST_WIDE_INT pbitsize, pbitpos;
3221 tree poffset;
3222 enum machine_mode pmode;
3223 int punsignedp, pvolatilep;
3224 affine_iv base_iv, offset_iv;
3225 tree dinit;
3227 /* Build a reference to the first location accessed by the
3228 inner-loop: *(BASE+INIT). (The first location is actually
3229 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3230 tree inner_base = build_fold_indirect_ref
3231 (fold_build_pointer_plus (base, init));
3233 if (dump_enabled_p ())
3235 dump_printf_loc (MSG_NOTE, vect_location,
3236 "analyze in outer-loop: ");
3237 dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
3240 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3241 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3242 gcc_assert (outer_base != NULL_TREE);
3244 if (pbitpos % BITS_PER_UNIT != 0)
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3248 "failed: bit offset alignment.\n");
3249 return false;
3252 outer_base = build_fold_addr_expr (outer_base);
3253 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3254 &base_iv, false))
3256 if (dump_enabled_p ())
3257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3258 "failed: evolution of base is not affine.\n");
3259 return false;
3262 if (offset)
3264 if (poffset)
3265 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
3266 poffset);
3267 else
3268 poffset = offset;
3271 if (!poffset)
3273 offset_iv.base = ssize_int (0);
3274 offset_iv.step = ssize_int (0);
3276 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3277 &offset_iv, false))
3279 if (dump_enabled_p ())
3280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3281 "evolution of offset is not affine.\n");
3282 return false;
3285 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3286 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3287 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3288 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3289 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3291 outer_step = size_binop (PLUS_EXPR,
3292 fold_convert (ssizetype, base_iv.step),
3293 fold_convert (ssizetype, offset_iv.step));
3295 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3296 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3297 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3298 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3299 STMT_VINFO_DR_OFFSET (stmt_info) =
3300 fold_convert (ssizetype, offset_iv.base);
3301 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3302 size_int (highest_pow2_factor (offset_iv.base));
3304 if (dump_enabled_p ())
3306 dump_printf_loc (MSG_NOTE, vect_location,
3307 "\touter base_address: ");
3308 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3309 STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3310 dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
3311 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3312 STMT_VINFO_DR_OFFSET (stmt_info));
3313 dump_printf (MSG_NOTE,
3314 "\n\touter constant offset from base address: ");
3315 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3316 STMT_VINFO_DR_INIT (stmt_info));
3317 dump_printf (MSG_NOTE, "\n\touter step: ");
3318 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3319 STMT_VINFO_DR_STEP (stmt_info));
3320 dump_printf (MSG_NOTE, "\n\touter aligned to: ");
3321 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3322 STMT_VINFO_DR_ALIGNED_TO (stmt_info));
3326 if (STMT_VINFO_DATA_REF (stmt_info))
3328 if (dump_enabled_p ())
3330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3331 "not vectorized: more than one data ref "
3332 "in stmt: ");
3333 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3336 if (bb_vinfo)
3338 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3339 stop_bb_analysis = true;
3340 continue;
3343 if (gather)
3344 free_data_ref (dr);
3345 return false;
3348 STMT_VINFO_DATA_REF (stmt_info) = dr;
3350 /* Set vectype for STMT. */
3351 scalar_type = TREE_TYPE (DR_REF (dr));
3352 STMT_VINFO_VECTYPE (stmt_info) =
3353 get_vectype_for_scalar_type (scalar_type);
3354 if (!STMT_VINFO_VECTYPE (stmt_info))
3356 if (dump_enabled_p ())
3358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3359 "not vectorized: no vectype for stmt: ");
3360 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3361 dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
3362 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
3363 scalar_type);
3366 if (bb_vinfo)
3368 /* Mark the statement as not vectorizable. */
3369 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3370 stop_bb_analysis = true;
3371 continue;
3374 if (gather)
3376 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3377 free_data_ref (dr);
3379 return false;
3382 /* Adjust the minimal vectorization factor according to the
3383 vector type. */
3384 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3385 if (vf > *min_vf)
3386 *min_vf = vf;
3388 if (gather)
3390 unsigned int j, k, n;
3391 struct data_reference *olddr
3392 = datarefs[i];
3393 vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3394 struct data_dependence_relation *ddr, *newddr;
3395 bool bad = false;
3396 tree off;
3397 vec<loop_p> nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3399 gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3400 if (gather
3401 && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3402 gather = false;
3403 if (!gather)
3405 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3406 free_data_ref (dr);
3407 if (dump_enabled_p ())
3409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3410 "not vectorized: not suitable for gather "
3411 "load ");
3412 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3414 return false;
3417 n = datarefs.length () - 1;
3418 for (j = 0, k = i - 1; j < i; j++)
3420 ddr = ddrs[k];
3421 gcc_assert (DDR_B (ddr) == olddr);
3422 newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3423 nest);
3424 ddrs[k] = newddr;
3425 free_dependence_relation (ddr);
3426 if (!bad
3427 && DR_IS_WRITE (DDR_A (newddr))
3428 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3429 bad = true;
3430 k += --n;
3433 k++;
3434 n = k + datarefs.length () - i - 1;
3435 for (; k < n; k++)
3437 ddr = ddrs[k];
3438 gcc_assert (DDR_A (ddr) == olddr);
3439 newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3440 nest);
3441 ddrs[k] = newddr;
3442 free_dependence_relation (ddr);
3443 if (!bad
3444 && DR_IS_WRITE (DDR_B (newddr))
3445 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3446 bad = true;
3449 k = ddrs.length ()
3450 - datarefs.length () + i;
3451 ddr = ddrs[k];
3452 gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3453 newddr = initialize_data_dependence_relation (dr, dr, nest);
3454 ddrs[k] = newddr;
3455 free_dependence_relation (ddr);
3456 datarefs[i] = dr;
3458 if (bad)
3460 if (dump_enabled_p ())
3462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3463 "not vectorized: data dependence conflict"
3464 " prevents gather load");
3465 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3467 return false;
3470 STMT_VINFO_GATHER_P (stmt_info) = true;
3472 else if (loop_vinfo
3473 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3475 bool strided_load = false;
3476 if (!nested_in_vect_loop_p (loop, stmt))
3477 strided_load
3478 = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
3479 if (!strided_load)
3481 if (dump_enabled_p ())
3483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3484 "not vectorized: not suitable for strided "
3485 "load ");
3486 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3488 return false;
3490 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
3494 return true;
3498 /* Function vect_get_new_vect_var.
3500 Returns a name for a new variable. The current naming scheme appends the
3501 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3502 the name of vectorizer generated variables, and appends that to NAME if
3503 provided. */
3505 tree
3506 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3508 const char *prefix;
3509 tree new_vect_var;
3511 switch (var_kind)
3513 case vect_simple_var:
3514 prefix = "vect_";
3515 break;
3516 case vect_scalar_var:
3517 prefix = "stmp_";
3518 break;
3519 case vect_pointer_var:
3520 prefix = "vect_p";
3521 break;
3522 default:
3523 gcc_unreachable ();
3526 if (name)
3528 char* tmp = concat (prefix, name, NULL);
3529 new_vect_var = create_tmp_reg (type, tmp);
3530 free (tmp);
3532 else
3533 new_vect_var = create_tmp_reg (type, prefix);
3535 return new_vect_var;
3539 /* Function vect_create_addr_base_for_vector_ref.
3541 Create an expression that computes the address of the first memory location
3542 that will be accessed for a data reference.
3544 Input:
3545 STMT: The statement containing the data reference.
3546 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3547 OFFSET: Optional. If supplied, it is be added to the initial address.
3548 LOOP: Specify relative to which loop-nest should the address be computed.
3549 For example, when the dataref is in an inner-loop nested in an
3550 outer-loop that is now being vectorized, LOOP can be either the
3551 outer-loop, or the inner-loop. The first memory location accessed
3552 by the following dataref ('in' points to short):
3554 for (i=0; i<N; i++)
3555 for (j=0; j<M; j++)
3556 s += in[i+j]
3558 is as follows:
3559 if LOOP=i_loop: &in (relative to i_loop)
3560 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3562 Output:
3563 1. Return an SSA_NAME whose value is the address of the memory location of
3564 the first vector of the data reference.
3565 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3566 these statement(s) which define the returned SSA_NAME.
3568 FORNOW: We are only handling array accesses with step 1. */
3570 tree
3571 vect_create_addr_base_for_vector_ref (gimple stmt,
3572 gimple_seq *new_stmt_list,
3573 tree offset,
3574 struct loop *loop)
3576 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3577 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3578 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3579 tree base_name;
3580 tree data_ref_base_var;
3581 tree vec_stmt;
3582 tree addr_base, addr_expr;
3583 tree dest;
3584 gimple_seq seq = NULL;
3585 tree base_offset = unshare_expr (DR_OFFSET (dr));
3586 tree init = unshare_expr (DR_INIT (dr));
3587 tree vect_ptr_type;
3588 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3589 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3590 tree base;
3592 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3594 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3596 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3598 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3599 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3600 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3603 if (loop_vinfo)
3604 base_name = build_fold_indirect_ref (data_ref_base);
3605 else
3607 base_offset = ssize_int (0);
3608 init = ssize_int (0);
3609 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
3612 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3613 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3614 data_ref_base_var);
3615 gimple_seq_add_seq (new_stmt_list, seq);
3617 /* Create base_offset */
3618 base_offset = size_binop (PLUS_EXPR,
3619 fold_convert (sizetype, base_offset),
3620 fold_convert (sizetype, init));
3621 dest = create_tmp_var (sizetype, "base_off");
3622 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3623 gimple_seq_add_seq (new_stmt_list, seq);
3625 if (offset)
3627 tree tmp = create_tmp_var (sizetype, "offset");
3629 offset = fold_build2 (MULT_EXPR, sizetype,
3630 fold_convert (sizetype, offset), step);
3631 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3632 base_offset, offset);
3633 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3634 gimple_seq_add_seq (new_stmt_list, seq);
3637 /* base + base_offset */
3638 if (loop_vinfo)
3639 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3640 else
3642 addr_base = build1 (ADDR_EXPR,
3643 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3644 unshare_expr (DR_REF (dr)));
3647 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3648 base = get_base_address (DR_REF (dr));
3649 if (base
3650 && TREE_CODE (base) == MEM_REF)
3651 vect_ptr_type
3652 = build_qualified_type (vect_ptr_type,
3653 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3655 vec_stmt = fold_convert (vect_ptr_type, addr_base);
3656 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3657 get_name (base_name));
3658 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3659 gimple_seq_add_seq (new_stmt_list, seq);
3661 if (DR_PTR_INFO (dr)
3662 && TREE_CODE (vec_stmt) == SSA_NAME)
3664 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3665 if (offset)
3666 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (vec_stmt));
3669 if (dump_enabled_p ())
3671 dump_printf_loc (MSG_NOTE, vect_location, "created ");
3672 dump_generic_expr (MSG_NOTE, TDF_SLIM, vec_stmt);
3675 return vec_stmt;
3679 /* Function vect_create_data_ref_ptr.
3681 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3682 location accessed in the loop by STMT, along with the def-use update
3683 chain to appropriately advance the pointer through the loop iterations.
3684 Also set aliasing information for the pointer. This pointer is used by
3685 the callers to this function to create a memory reference expression for
3686 vector load/store access.
3688 Input:
3689 1. STMT: a stmt that references memory. Expected to be of the form
3690 GIMPLE_ASSIGN <name, data-ref> or
3691 GIMPLE_ASSIGN <data-ref, name>.
3692 2. AGGR_TYPE: the type of the reference, which should be either a vector
3693 or an array.
3694 3. AT_LOOP: the loop where the vector memref is to be created.
3695 4. OFFSET (optional): an offset to be added to the initial address accessed
3696 by the data-ref in STMT.
3697 5. BSI: location where the new stmts are to be placed if there is no loop
3698 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3699 pointing to the initial address.
3701 Output:
3702 1. Declare a new ptr to vector_type, and have it point to the base of the
3703 data reference (initial addressed accessed by the data reference).
3704 For example, for vector of type V8HI, the following code is generated:
3706 v8hi *ap;
3707 ap = (v8hi *)initial_address;
3709 if OFFSET is not supplied:
3710 initial_address = &a[init];
3711 if OFFSET is supplied:
3712 initial_address = &a[init + OFFSET];
3714 Return the initial_address in INITIAL_ADDRESS.
3716 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3717 update the pointer in each iteration of the loop.
3719 Return the increment stmt that updates the pointer in PTR_INCR.
3721 3. Set INV_P to true if the access pattern of the data reference in the
3722 vectorized loop is invariant. Set it to false otherwise.
3724 4. Return the pointer. */
3726 tree
3727 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3728 tree offset, tree *initial_address,
3729 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3730 bool only_init, bool *inv_p)
3732 tree base_name;
3733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3734 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3735 struct loop *loop = NULL;
3736 bool nested_in_vect_loop = false;
3737 struct loop *containing_loop = NULL;
3738 tree aggr_ptr_type;
3739 tree aggr_ptr;
3740 tree new_temp;
3741 gimple vec_stmt;
3742 gimple_seq new_stmt_list = NULL;
3743 edge pe = NULL;
3744 basic_block new_bb;
3745 tree aggr_ptr_init;
3746 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3747 tree aptr;
3748 gimple_stmt_iterator incr_gsi;
3749 bool insert_after;
3750 bool negative;
3751 tree indx_before_incr, indx_after_incr;
3752 gimple incr;
3753 tree step;
3754 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3755 tree base;
3757 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3758 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3760 if (loop_vinfo)
3762 loop = LOOP_VINFO_LOOP (loop_vinfo);
3763 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3764 containing_loop = (gimple_bb (stmt))->loop_father;
3765 pe = loop_preheader_edge (loop);
3767 else
3769 gcc_assert (bb_vinfo);
3770 only_init = true;
3771 *ptr_incr = NULL;
3774 /* Check the step (evolution) of the load in LOOP, and record
3775 whether it's invariant. */
3776 if (nested_in_vect_loop)
3777 step = STMT_VINFO_DR_STEP (stmt_info);
3778 else
3779 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3781 if (tree_int_cst_compare (step, size_zero_node) == 0)
3782 *inv_p = true;
3783 else
3784 *inv_p = false;
3785 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3787 /* Create an expression for the first address accessed by this load
3788 in LOOP. */
3789 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3791 if (dump_enabled_p ())
3793 tree data_ref_base = base_name;
3794 dump_printf_loc (MSG_NOTE, vect_location,
3795 "create %s-pointer variable to type: ",
3796 tree_code_name[(int) TREE_CODE (aggr_type)]);
3797 dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
3798 if (TREE_CODE (data_ref_base) == VAR_DECL
3799 || TREE_CODE (data_ref_base) == ARRAY_REF)
3800 dump_printf (MSG_NOTE, " vectorizing an array ref: ");
3801 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3802 dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
3803 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3804 dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
3805 dump_generic_expr (MSG_NOTE, TDF_SLIM, base_name);
3808 /* (1) Create the new aggregate-pointer variable. */
3809 aggr_ptr_type = build_pointer_type (aggr_type);
3810 base = get_base_address (DR_REF (dr));
3811 if (base
3812 && TREE_CODE (base) == MEM_REF)
3813 aggr_ptr_type
3814 = build_qualified_type (aggr_ptr_type,
3815 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3816 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3817 get_name (base_name));
3819 /* Vector and array types inherit the alias set of their component
3820 type by default so we need to use a ref-all pointer if the data
3821 reference does not conflict with the created aggregated data
3822 reference because it is not addressable. */
3823 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3824 get_alias_set (DR_REF (dr))))
3826 aggr_ptr_type
3827 = build_pointer_type_for_mode (aggr_type,
3828 TYPE_MODE (aggr_ptr_type), true);
3829 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3830 get_name (base_name));
3833 /* Likewise for any of the data references in the stmt group. */
3834 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3836 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3839 tree lhs = gimple_assign_lhs (orig_stmt);
3840 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3841 get_alias_set (lhs)))
3843 aggr_ptr_type
3844 = build_pointer_type_for_mode (aggr_type,
3845 TYPE_MODE (aggr_ptr_type), true);
3846 aggr_ptr
3847 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3848 get_name (base_name));
3849 break;
3852 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3854 while (orig_stmt);
3857 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3858 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3859 def-use update cycles for the pointer: one relative to the outer-loop
3860 (LOOP), which is what steps (3) and (4) below do. The other is relative
3861 to the inner-loop (which is the inner-most loop containing the dataref),
3862 and this is done be step (5) below.
3864 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3865 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3866 redundant. Steps (3),(4) create the following:
3868 vp0 = &base_addr;
3869 LOOP: vp1 = phi(vp0,vp2)
3872 vp2 = vp1 + step
3873 goto LOOP
3875 If there is an inner-loop nested in loop, then step (5) will also be
3876 applied, and an additional update in the inner-loop will be created:
3878 vp0 = &base_addr;
3879 LOOP: vp1 = phi(vp0,vp2)
3881 inner: vp3 = phi(vp1,vp4)
3882 vp4 = vp3 + inner_step
3883 if () goto inner
3885 vp2 = vp1 + step
3886 if () goto LOOP */
3888 /* (2) Calculate the initial address of the aggregate-pointer, and set
3889 the aggregate-pointer to point to it before the loop. */
3891 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3893 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3894 offset, loop);
3895 if (new_stmt_list)
3897 if (pe)
3899 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3900 gcc_assert (!new_bb);
3902 else
3903 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3906 *initial_address = new_temp;
3908 /* Create: p = (aggr_type *) initial_base */
3909 if (TREE_CODE (new_temp) != SSA_NAME
3910 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3912 vec_stmt = gimple_build_assign (aggr_ptr,
3913 fold_convert (aggr_ptr_type, new_temp));
3914 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3915 /* Copy the points-to information if it exists. */
3916 if (DR_PTR_INFO (dr))
3917 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3918 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3919 if (pe)
3921 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3922 gcc_assert (!new_bb);
3924 else
3925 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3927 else
3928 aggr_ptr_init = new_temp;
3930 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3931 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3932 inner-loop nested in LOOP (during outer-loop vectorization). */
3934 /* No update in loop is required. */
3935 if (only_init && (!loop_vinfo || at_loop == loop))
3936 aptr = aggr_ptr_init;
3937 else
3939 /* The step of the aggregate pointer is the type size. */
3940 tree step = TYPE_SIZE_UNIT (aggr_type);
3941 /* One exception to the above is when the scalar step of the load in
3942 LOOP is zero. In this case the step here is also zero. */
3943 if (*inv_p)
3944 step = size_zero_node;
3945 else if (negative)
3946 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3948 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3950 create_iv (aggr_ptr_init,
3951 fold_convert (aggr_ptr_type, step),
3952 aggr_ptr, loop, &incr_gsi, insert_after,
3953 &indx_before_incr, &indx_after_incr);
3954 incr = gsi_stmt (incr_gsi);
3955 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3957 /* Copy the points-to information if it exists. */
3958 if (DR_PTR_INFO (dr))
3960 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3961 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3963 if (ptr_incr)
3964 *ptr_incr = incr;
3966 aptr = indx_before_incr;
3969 if (!nested_in_vect_loop || only_init)
3970 return aptr;
3973 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3974 nested in LOOP, if exists. */
3976 gcc_assert (nested_in_vect_loop);
3977 if (!only_init)
3979 standard_iv_increment_position (containing_loop, &incr_gsi,
3980 &insert_after);
3981 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3982 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3983 &indx_after_incr);
3984 incr = gsi_stmt (incr_gsi);
3985 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3987 /* Copy the points-to information if it exists. */
3988 if (DR_PTR_INFO (dr))
3990 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3991 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3993 if (ptr_incr)
3994 *ptr_incr = incr;
3996 return indx_before_incr;
3998 else
3999 gcc_unreachable ();
4003 /* Function bump_vector_ptr
4005 Increment a pointer (to a vector type) by vector-size. If requested,
4006 i.e. if PTR-INCR is given, then also connect the new increment stmt
4007 to the existing def-use update-chain of the pointer, by modifying
4008 the PTR_INCR as illustrated below:
4010 The pointer def-use update-chain before this function:
4011 DATAREF_PTR = phi (p_0, p_2)
4012 ....
4013 PTR_INCR: p_2 = DATAREF_PTR + step
4015 The pointer def-use update-chain after this function:
4016 DATAREF_PTR = phi (p_0, p_2)
4017 ....
4018 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4019 ....
4020 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
4022 Input:
4023 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
4024 in the loop.
4025 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
4026 the loop. The increment amount across iterations is expected
4027 to be vector_size.
4028 BSI - location where the new update stmt is to be placed.
4029 STMT - the original scalar memory-access stmt that is being vectorized.
4030 BUMP - optional. The offset by which to bump the pointer. If not given,
4031 the offset is assumed to be vector_size.
4033 Output: Return NEW_DATAREF_PTR as illustrated above.
4037 tree
4038 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
4039 gimple stmt, tree bump)
4041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4042 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4043 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4044 tree update = TYPE_SIZE_UNIT (vectype);
4045 gimple incr_stmt;
4046 ssa_op_iter iter;
4047 use_operand_p use_p;
4048 tree new_dataref_ptr;
4050 if (bump)
4051 update = bump;
4053 new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
4054 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
4055 dataref_ptr, update);
4056 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
4058 /* Copy the points-to information if it exists. */
4059 if (DR_PTR_INFO (dr))
4061 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
4062 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
4065 if (!ptr_incr)
4066 return new_dataref_ptr;
4068 /* Update the vector-pointer's cross-iteration increment. */
4069 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4071 tree use = USE_FROM_PTR (use_p);
4073 if (use == dataref_ptr)
4074 SET_USE (use_p, new_dataref_ptr);
4075 else
4076 gcc_assert (tree_int_cst_compare (use, update) == 0);
4079 return new_dataref_ptr;
4083 /* Function vect_create_destination_var.
4085 Create a new temporary of type VECTYPE. */
4087 tree
4088 vect_create_destination_var (tree scalar_dest, tree vectype)
4090 tree vec_dest;
4091 const char *new_name;
4092 tree type;
4093 enum vect_var_kind kind;
4095 kind = vectype ? vect_simple_var : vect_scalar_var;
4096 type = vectype ? vectype : TREE_TYPE (scalar_dest);
4098 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4100 new_name = get_name (scalar_dest);
4101 if (!new_name)
4102 new_name = "var_";
4103 vec_dest = vect_get_new_vect_var (type, kind, new_name);
4105 return vec_dest;
4108 /* Function vect_grouped_store_supported.
4110 Returns TRUE if interleave high and interleave low permutations
4111 are supported, and FALSE otherwise. */
4113 bool
4114 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
4116 enum machine_mode mode = TYPE_MODE (vectype);
4118 /* vect_permute_store_chain requires the group size to be a power of two. */
4119 if (exact_log2 (count) == -1)
4121 if (dump_enabled_p ())
4122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4123 "the size of the group of accesses"
4124 " is not a power of 2");
4125 return false;
4128 /* Check that the permutation is supported. */
4129 if (VECTOR_MODE_P (mode))
4131 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4132 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4133 for (i = 0; i < nelt / 2; i++)
4135 sel[i * 2] = i;
4136 sel[i * 2 + 1] = i + nelt;
4138 if (can_vec_perm_p (mode, false, sel))
4140 for (i = 0; i < nelt; i++)
4141 sel[i] += nelt / 2;
4142 if (can_vec_perm_p (mode, false, sel))
4143 return true;
4147 if (dump_enabled_p ())
4148 dump_printf (MSG_MISSED_OPTIMIZATION,
4149 "interleave op not supported by target.");
4150 return false;
4154 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
4155 type VECTYPE. */
4157 bool
4158 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4160 return vect_lanes_optab_supported_p ("vec_store_lanes",
4161 vec_store_lanes_optab,
4162 vectype, count);
4166 /* Function vect_permute_store_chain.
4168 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4169 a power of 2, generate interleave_high/low stmts to reorder the data
4170 correctly for the stores. Return the final references for stores in
4171 RESULT_CHAIN.
4173 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4174 The input is 4 vectors each containing 8 elements. We assign a number to
4175 each element, the input sequence is:
4177 1st vec: 0 1 2 3 4 5 6 7
4178 2nd vec: 8 9 10 11 12 13 14 15
4179 3rd vec: 16 17 18 19 20 21 22 23
4180 4th vec: 24 25 26 27 28 29 30 31
4182 The output sequence should be:
4184 1st vec: 0 8 16 24 1 9 17 25
4185 2nd vec: 2 10 18 26 3 11 19 27
4186 3rd vec: 4 12 20 28 5 13 21 30
4187 4th vec: 6 14 22 30 7 15 23 31
4189 i.e., we interleave the contents of the four vectors in their order.
4191 We use interleave_high/low instructions to create such output. The input of
4192 each interleave_high/low operation is two vectors:
4193 1st vec 2nd vec
4194 0 1 2 3 4 5 6 7
4195 the even elements of the result vector are obtained left-to-right from the
4196 high/low elements of the first vector. The odd elements of the result are
4197 obtained left-to-right from the high/low elements of the second vector.
4198 The output of interleave_high will be: 0 4 1 5
4199 and of interleave_low: 2 6 3 7
4202 The permutation is done in log LENGTH stages. In each stage interleave_high
4203 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4204 where the first argument is taken from the first half of DR_CHAIN and the
4205 second argument from it's second half.
4206 In our example,
4208 I1: interleave_high (1st vec, 3rd vec)
4209 I2: interleave_low (1st vec, 3rd vec)
4210 I3: interleave_high (2nd vec, 4th vec)
4211 I4: interleave_low (2nd vec, 4th vec)
4213 The output for the first stage is:
4215 I1: 0 16 1 17 2 18 3 19
4216 I2: 4 20 5 21 6 22 7 23
4217 I3: 8 24 9 25 10 26 11 27
4218 I4: 12 28 13 29 14 30 15 31
4220 The output of the second stage, i.e. the final result is:
4222 I1: 0 8 16 24 1 9 17 25
4223 I2: 2 10 18 26 3 11 19 27
4224 I3: 4 12 20 28 5 13 21 30
4225 I4: 6 14 22 30 7 15 23 31. */
4227 void
4228 vect_permute_store_chain (vec<tree> dr_chain,
4229 unsigned int length,
4230 gimple stmt,
4231 gimple_stmt_iterator *gsi,
4232 vec<tree> *result_chain)
4234 tree vect1, vect2, high, low;
4235 gimple perm_stmt;
4236 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4237 tree perm_mask_low, perm_mask_high;
4238 unsigned int i, n;
4239 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
4240 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4242 *result_chain = dr_chain.copy ();
4244 for (i = 0, n = nelt / 2; i < n; i++)
4246 sel[i * 2] = i;
4247 sel[i * 2 + 1] = i + nelt;
4249 perm_mask_high = vect_gen_perm_mask (vectype, sel);
4250 gcc_assert (perm_mask_high != NULL);
4252 for (i = 0; i < nelt; i++)
4253 sel[i] += nelt / 2;
4254 perm_mask_low = vect_gen_perm_mask (vectype, sel);
4255 gcc_assert (perm_mask_low != NULL);
4257 for (i = 0, n = exact_log2 (length); i < n; i++)
4259 for (j = 0; j < length/2; j++)
4261 vect1 = dr_chain[j];
4262 vect2 = dr_chain[j+length/2];
4264 /* Create interleaving stmt:
4265 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4266 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
4267 perm_stmt
4268 = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
4269 vect1, vect2, perm_mask_high);
4270 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4271 (*result_chain)[2*j] = high;
4273 /* Create interleaving stmt:
4274 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4275 nelt*3/2+1, ...}> */
4276 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
4277 perm_stmt
4278 = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
4279 vect1, vect2, perm_mask_low);
4280 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4281 (*result_chain)[2*j+1] = low;
4283 dr_chain = result_chain->copy ();
4287 /* Function vect_setup_realignment
4289 This function is called when vectorizing an unaligned load using
4290 the dr_explicit_realign[_optimized] scheme.
4291 This function generates the following code at the loop prolog:
4293 p = initial_addr;
4294 x msq_init = *(floor(p)); # prolog load
4295 realignment_token = call target_builtin;
4296 loop:
4297 x msq = phi (msq_init, ---)
4299 The stmts marked with x are generated only for the case of
4300 dr_explicit_realign_optimized.
4302 The code above sets up a new (vector) pointer, pointing to the first
4303 location accessed by STMT, and a "floor-aligned" load using that pointer.
4304 It also generates code to compute the "realignment-token" (if the relevant
4305 target hook was defined), and creates a phi-node at the loop-header bb
4306 whose arguments are the result of the prolog-load (created by this
4307 function) and the result of a load that takes place in the loop (to be
4308 created by the caller to this function).
4310 For the case of dr_explicit_realign_optimized:
4311 The caller to this function uses the phi-result (msq) to create the
4312 realignment code inside the loop, and sets up the missing phi argument,
4313 as follows:
4314 loop:
4315 msq = phi (msq_init, lsq)
4316 lsq = *(floor(p')); # load in loop
4317 result = realign_load (msq, lsq, realignment_token);
4319 For the case of dr_explicit_realign:
4320 loop:
4321 msq = *(floor(p)); # load in loop
4322 p' = p + (VS-1);
4323 lsq = *(floor(p')); # load in loop
4324 result = realign_load (msq, lsq, realignment_token);
4326 Input:
4327 STMT - (scalar) load stmt to be vectorized. This load accesses
4328 a memory location that may be unaligned.
4329 BSI - place where new code is to be inserted.
4330 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4331 is used.
4333 Output:
4334 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4335 target hook, if defined.
4336 Return value - the result of the loop-header phi node. */
4338 tree
4339 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4340 tree *realignment_token,
4341 enum dr_alignment_support alignment_support_scheme,
4342 tree init_addr,
4343 struct loop **at_loop)
4345 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4346 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4347 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4348 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4349 struct loop *loop = NULL;
4350 edge pe = NULL;
4351 tree scalar_dest = gimple_assign_lhs (stmt);
4352 tree vec_dest;
4353 gimple inc;
4354 tree ptr;
4355 tree data_ref;
4356 gimple new_stmt;
4357 basic_block new_bb;
4358 tree msq_init = NULL_TREE;
4359 tree new_temp;
4360 gimple phi_stmt;
4361 tree msq = NULL_TREE;
4362 gimple_seq stmts = NULL;
4363 bool inv_p;
4364 bool compute_in_loop = false;
4365 bool nested_in_vect_loop = false;
4366 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4367 struct loop *loop_for_initial_load = NULL;
4369 if (loop_vinfo)
4371 loop = LOOP_VINFO_LOOP (loop_vinfo);
4372 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4375 gcc_assert (alignment_support_scheme == dr_explicit_realign
4376 || alignment_support_scheme == dr_explicit_realign_optimized);
4378 /* We need to generate three things:
4379 1. the misalignment computation
4380 2. the extra vector load (for the optimized realignment scheme).
4381 3. the phi node for the two vectors from which the realignment is
4382 done (for the optimized realignment scheme). */
4384 /* 1. Determine where to generate the misalignment computation.
4386 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4387 calculation will be generated by this function, outside the loop (in the
4388 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4389 caller, inside the loop.
4391 Background: If the misalignment remains fixed throughout the iterations of
4392 the loop, then both realignment schemes are applicable, and also the
4393 misalignment computation can be done outside LOOP. This is because we are
4394 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4395 are a multiple of VS (the Vector Size), and therefore the misalignment in
4396 different vectorized LOOP iterations is always the same.
4397 The problem arises only if the memory access is in an inner-loop nested
4398 inside LOOP, which is now being vectorized using outer-loop vectorization.
4399 This is the only case when the misalignment of the memory access may not
4400 remain fixed throughout the iterations of the inner-loop (as explained in
4401 detail in vect_supportable_dr_alignment). In this case, not only is the
4402 optimized realignment scheme not applicable, but also the misalignment
4403 computation (and generation of the realignment token that is passed to
4404 REALIGN_LOAD) have to be done inside the loop.
4406 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4407 or not, which in turn determines if the misalignment is computed inside
4408 the inner-loop, or outside LOOP. */
4410 if (init_addr != NULL_TREE || !loop_vinfo)
4412 compute_in_loop = true;
4413 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4417 /* 2. Determine where to generate the extra vector load.
4419 For the optimized realignment scheme, instead of generating two vector
4420 loads in each iteration, we generate a single extra vector load in the
4421 preheader of the loop, and in each iteration reuse the result of the
4422 vector load from the previous iteration. In case the memory access is in
4423 an inner-loop nested inside LOOP, which is now being vectorized using
4424 outer-loop vectorization, we need to determine whether this initial vector
4425 load should be generated at the preheader of the inner-loop, or can be
4426 generated at the preheader of LOOP. If the memory access has no evolution
4427 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4428 to be generated inside LOOP (in the preheader of the inner-loop). */
4430 if (nested_in_vect_loop)
4432 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4433 bool invariant_in_outerloop =
4434 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4435 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4437 else
4438 loop_for_initial_load = loop;
4439 if (at_loop)
4440 *at_loop = loop_for_initial_load;
4442 if (loop_for_initial_load)
4443 pe = loop_preheader_edge (loop_for_initial_load);
4445 /* 3. For the case of the optimized realignment, create the first vector
4446 load at the loop preheader. */
4448 if (alignment_support_scheme == dr_explicit_realign_optimized)
4450 /* Create msq_init = *(floor(p1)) in the loop preheader */
4452 gcc_assert (!compute_in_loop);
4453 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4454 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4455 NULL_TREE, &init_addr, NULL, &inc,
4456 true, &inv_p);
4457 new_temp = copy_ssa_name (ptr, NULL);
4458 new_stmt = gimple_build_assign_with_ops
4459 (BIT_AND_EXPR, new_temp, ptr,
4460 build_int_cst (TREE_TYPE (ptr),
4461 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4462 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4463 gcc_assert (!new_bb);
4464 data_ref
4465 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4466 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4467 new_stmt = gimple_build_assign (vec_dest, data_ref);
4468 new_temp = make_ssa_name (vec_dest, new_stmt);
4469 gimple_assign_set_lhs (new_stmt, new_temp);
4470 if (pe)
4472 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4473 gcc_assert (!new_bb);
4475 else
4476 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4478 msq_init = gimple_assign_lhs (new_stmt);
4481 /* 4. Create realignment token using a target builtin, if available.
4482 It is done either inside the containing loop, or before LOOP (as
4483 determined above). */
4485 if (targetm.vectorize.builtin_mask_for_load)
4487 tree builtin_decl;
4489 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4490 if (!init_addr)
4492 /* Generate the INIT_ADDR computation outside LOOP. */
4493 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4494 NULL_TREE, loop);
4495 if (loop)
4497 pe = loop_preheader_edge (loop);
4498 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4499 gcc_assert (!new_bb);
4501 else
4502 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4505 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4506 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4507 vec_dest =
4508 vect_create_destination_var (scalar_dest,
4509 gimple_call_return_type (new_stmt));
4510 new_temp = make_ssa_name (vec_dest, new_stmt);
4511 gimple_call_set_lhs (new_stmt, new_temp);
4513 if (compute_in_loop)
4514 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4515 else
4517 /* Generate the misalignment computation outside LOOP. */
4518 pe = loop_preheader_edge (loop);
4519 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4520 gcc_assert (!new_bb);
4523 *realignment_token = gimple_call_lhs (new_stmt);
4525 /* The result of the CALL_EXPR to this builtin is determined from
4526 the value of the parameter and no global variables are touched
4527 which makes the builtin a "const" function. Requiring the
4528 builtin to have the "const" attribute makes it unnecessary
4529 to call mark_call_clobbered. */
4530 gcc_assert (TREE_READONLY (builtin_decl));
4533 if (alignment_support_scheme == dr_explicit_realign)
4534 return msq;
4536 gcc_assert (!compute_in_loop);
4537 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4540 /* 5. Create msq = phi <msq_init, lsq> in loop */
4542 pe = loop_preheader_edge (containing_loop);
4543 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4544 msq = make_ssa_name (vec_dest, NULL);
4545 phi_stmt = create_phi_node (msq, containing_loop->header);
4546 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4548 return msq;
4552 /* Function vect_grouped_load_supported.
4554 Returns TRUE if even and odd permutations are supported,
4555 and FALSE otherwise. */
4557 bool
4558 vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4560 enum machine_mode mode = TYPE_MODE (vectype);
4562 /* vect_permute_load_chain requires the group size to be a power of two. */
4563 if (exact_log2 (count) == -1)
4565 if (dump_enabled_p ())
4566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4567 "the size of the group of accesses"
4568 " is not a power of 2");
4569 return false;
4572 /* Check that the permutation is supported. */
4573 if (VECTOR_MODE_P (mode))
4575 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4576 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4578 for (i = 0; i < nelt; i++)
4579 sel[i] = i * 2;
4580 if (can_vec_perm_p (mode, false, sel))
4582 for (i = 0; i < nelt; i++)
4583 sel[i] = i * 2 + 1;
4584 if (can_vec_perm_p (mode, false, sel))
4585 return true;
4589 if (dump_enabled_p ())
4590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4591 "extract even/odd not supported by target");
4592 return false;
4595 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4596 type VECTYPE. */
4598 bool
4599 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4601 return vect_lanes_optab_supported_p ("vec_load_lanes",
4602 vec_load_lanes_optab,
4603 vectype, count);
4606 /* Function vect_permute_load_chain.
4608 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4609 a power of 2, generate extract_even/odd stmts to reorder the input data
4610 correctly. Return the final references for loads in RESULT_CHAIN.
4612 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4613 The input is 4 vectors each containing 8 elements. We assign a number to each
4614 element, the input sequence is:
4616 1st vec: 0 1 2 3 4 5 6 7
4617 2nd vec: 8 9 10 11 12 13 14 15
4618 3rd vec: 16 17 18 19 20 21 22 23
4619 4th vec: 24 25 26 27 28 29 30 31
4621 The output sequence should be:
4623 1st vec: 0 4 8 12 16 20 24 28
4624 2nd vec: 1 5 9 13 17 21 25 29
4625 3rd vec: 2 6 10 14 18 22 26 30
4626 4th vec: 3 7 11 15 19 23 27 31
4628 i.e., the first output vector should contain the first elements of each
4629 interleaving group, etc.
4631 We use extract_even/odd instructions to create such output. The input of
4632 each extract_even/odd operation is two vectors
4633 1st vec 2nd vec
4634 0 1 2 3 4 5 6 7
4636 and the output is the vector of extracted even/odd elements. The output of
4637 extract_even will be: 0 2 4 6
4638 and of extract_odd: 1 3 5 7
4641 The permutation is done in log LENGTH stages. In each stage extract_even
4642 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4643 their order. In our example,
4645 E1: extract_even (1st vec, 2nd vec)
4646 E2: extract_odd (1st vec, 2nd vec)
4647 E3: extract_even (3rd vec, 4th vec)
4648 E4: extract_odd (3rd vec, 4th vec)
4650 The output for the first stage will be:
4652 E1: 0 2 4 6 8 10 12 14
4653 E2: 1 3 5 7 9 11 13 15
4654 E3: 16 18 20 22 24 26 28 30
4655 E4: 17 19 21 23 25 27 29 31
4657 In order to proceed and create the correct sequence for the next stage (or
4658 for the correct output, if the second stage is the last one, as in our
4659 example), we first put the output of extract_even operation and then the
4660 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4661 The input for the second stage is:
4663 1st vec (E1): 0 2 4 6 8 10 12 14
4664 2nd vec (E3): 16 18 20 22 24 26 28 30
4665 3rd vec (E2): 1 3 5 7 9 11 13 15
4666 4th vec (E4): 17 19 21 23 25 27 29 31
4668 The output of the second stage:
4670 E1: 0 4 8 12 16 20 24 28
4671 E2: 2 6 10 14 18 22 26 30
4672 E3: 1 5 9 13 17 21 25 29
4673 E4: 3 7 11 15 19 23 27 31
4675 And RESULT_CHAIN after reordering:
4677 1st vec (E1): 0 4 8 12 16 20 24 28
4678 2nd vec (E3): 1 5 9 13 17 21 25 29
4679 3rd vec (E2): 2 6 10 14 18 22 26 30
4680 4th vec (E4): 3 7 11 15 19 23 27 31. */
4682 static void
4683 vect_permute_load_chain (vec<tree> dr_chain,
4684 unsigned int length,
4685 gimple stmt,
4686 gimple_stmt_iterator *gsi,
4687 vec<tree> *result_chain)
4689 tree data_ref, first_vect, second_vect;
4690 tree perm_mask_even, perm_mask_odd;
4691 gimple perm_stmt;
4692 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4693 unsigned int i, j, log_length = exact_log2 (length);
4694 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4695 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4697 *result_chain = dr_chain.copy ();
4699 for (i = 0; i < nelt; ++i)
4700 sel[i] = i * 2;
4701 perm_mask_even = vect_gen_perm_mask (vectype, sel);
4702 gcc_assert (perm_mask_even != NULL);
4704 for (i = 0; i < nelt; ++i)
4705 sel[i] = i * 2 + 1;
4706 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4707 gcc_assert (perm_mask_odd != NULL);
4709 for (i = 0; i < log_length; i++)
4711 for (j = 0; j < length; j += 2)
4713 first_vect = dr_chain[j];
4714 second_vect = dr_chain[j+1];
4716 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4717 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
4718 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4719 first_vect, second_vect,
4720 perm_mask_even);
4721 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4722 (*result_chain)[j/2] = data_ref;
4724 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4725 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
4726 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4727 first_vect, second_vect,
4728 perm_mask_odd);
4729 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4730 (*result_chain)[j/2+length/2] = data_ref;
4732 dr_chain = result_chain->copy ();
4737 /* Function vect_transform_grouped_load.
4739 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4740 to perform their permutation and ascribe the result vectorized statements to
4741 the scalar statements.
4744 void
4745 vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
4746 gimple_stmt_iterator *gsi)
4748 vec<tree> result_chain = vNULL;
4750 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4751 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4752 vectors, that are ready for vector computation. */
4753 result_chain.create (size);
4754 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4755 vect_record_grouped_load_vectors (stmt, result_chain);
4756 result_chain.release ();
4759 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4760 generated as part of the vectorization of STMT. Assign the statement
4761 for each vector to the associated scalar statement. */
4763 void
4764 vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
4766 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4767 gimple next_stmt, new_stmt;
4768 unsigned int i, gap_count;
4769 tree tmp_data_ref;
4771 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4772 Since we scan the chain starting from it's first node, their order
4773 corresponds the order of data-refs in RESULT_CHAIN. */
4774 next_stmt = first_stmt;
4775 gap_count = 1;
4776 FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
4778 if (!next_stmt)
4779 break;
4781 /* Skip the gaps. Loads created for the gaps will be removed by dead
4782 code elimination pass later. No need to check for the first stmt in
4783 the group, since it always exists.
4784 GROUP_GAP is the number of steps in elements from the previous
4785 access (if there is no gap GROUP_GAP is 1). We skip loads that
4786 correspond to the gaps. */
4787 if (next_stmt != first_stmt
4788 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4790 gap_count++;
4791 continue;
4794 while (next_stmt)
4796 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4797 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4798 copies, and we put the new vector statement in the first available
4799 RELATED_STMT. */
4800 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4801 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4802 else
4804 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4806 gimple prev_stmt =
4807 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4808 gimple rel_stmt =
4809 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4810 while (rel_stmt)
4812 prev_stmt = rel_stmt;
4813 rel_stmt =
4814 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4817 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4818 new_stmt;
4822 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4823 gap_count = 1;
4824 /* If NEXT_STMT accesses the same DR as the previous statement,
4825 put the same TMP_DATA_REF as its vectorized statement; otherwise
4826 get the next data-ref from RESULT_CHAIN. */
4827 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4828 break;
4833 /* Function vect_force_dr_alignment_p.
4835 Returns whether the alignment of a DECL can be forced to be aligned
4836 on ALIGNMENT bit boundary. */
4838 bool
4839 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4841 if (TREE_CODE (decl) != VAR_DECL)
4842 return false;
4844 /* We cannot change alignment of common or external symbols as another
4845 translation unit may contain a definition with lower alignment.
4846 The rules of common symbol linking mean that the definition
4847 will override the common symbol. */
4848 if (DECL_EXTERNAL (decl)
4849 || DECL_COMMON (decl))
4850 return false;
4852 if (TREE_ASM_WRITTEN (decl))
4853 return false;
4855 /* Do not override the alignment as specified by the ABI when the used
4856 attribute is set. */
4857 if (DECL_PRESERVE_P (decl))
4858 return false;
4860 /* Do not override explicit alignment set by the user when an explicit
4861 section name is also used. This is a common idiom used by many
4862 software projects. */
4863 if (DECL_SECTION_NAME (decl) != NULL_TREE
4864 && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
4865 return false;
4867 if (TREE_STATIC (decl))
4868 return (alignment <= MAX_OFILE_ALIGNMENT);
4869 else
4870 return (alignment <= MAX_STACK_ALIGNMENT);
4874 /* Return whether the data reference DR is supported with respect to its
4875 alignment.
4876 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4877 it is aligned, i.e., check if it is possible to vectorize it with different
4878 alignment. */
4880 enum dr_alignment_support
4881 vect_supportable_dr_alignment (struct data_reference *dr,
4882 bool check_aligned_accesses)
4884 gimple stmt = DR_STMT (dr);
4885 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4886 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4887 enum machine_mode mode = TYPE_MODE (vectype);
4888 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4889 struct loop *vect_loop = NULL;
4890 bool nested_in_vect_loop = false;
4892 if (aligned_access_p (dr) && !check_aligned_accesses)
4893 return dr_aligned;
4895 if (loop_vinfo)
4897 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4898 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4901 /* Possibly unaligned access. */
4903 /* We can choose between using the implicit realignment scheme (generating
4904 a misaligned_move stmt) and the explicit realignment scheme (generating
4905 aligned loads with a REALIGN_LOAD). There are two variants to the
4906 explicit realignment scheme: optimized, and unoptimized.
4907 We can optimize the realignment only if the step between consecutive
4908 vector loads is equal to the vector size. Since the vector memory
4909 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4910 is guaranteed that the misalignment amount remains the same throughout the
4911 execution of the vectorized loop. Therefore, we can create the
4912 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4913 at the loop preheader.
4915 However, in the case of outer-loop vectorization, when vectorizing a
4916 memory access in the inner-loop nested within the LOOP that is now being
4917 vectorized, while it is guaranteed that the misalignment of the
4918 vectorized memory access will remain the same in different outer-loop
4919 iterations, it is *not* guaranteed that is will remain the same throughout
4920 the execution of the inner-loop. This is because the inner-loop advances
4921 with the original scalar step (and not in steps of VS). If the inner-loop
4922 step happens to be a multiple of VS, then the misalignment remains fixed
4923 and we can use the optimized realignment scheme. For example:
4925 for (i=0; i<N; i++)
4926 for (j=0; j<M; j++)
4927 s += a[i+j];
4929 When vectorizing the i-loop in the above example, the step between
4930 consecutive vector loads is 1, and so the misalignment does not remain
4931 fixed across the execution of the inner-loop, and the realignment cannot
4932 be optimized (as illustrated in the following pseudo vectorized loop):
4934 for (i=0; i<N; i+=4)
4935 for (j=0; j<M; j++){
4936 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4937 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4938 // (assuming that we start from an aligned address).
4941 We therefore have to use the unoptimized realignment scheme:
4943 for (i=0; i<N; i+=4)
4944 for (j=k; j<M; j+=4)
4945 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4946 // that the misalignment of the initial address is
4947 // 0).
4949 The loop can then be vectorized as follows:
4951 for (k=0; k<4; k++){
4952 rt = get_realignment_token (&vp[k]);
4953 for (i=0; i<N; i+=4){
4954 v1 = vp[i+k];
4955 for (j=k; j<M; j+=4){
4956 v2 = vp[i+j+VS-1];
4957 va = REALIGN_LOAD <v1,v2,rt>;
4958 vs += va;
4959 v1 = v2;
4962 } */
4964 if (DR_IS_READ (dr))
4966 bool is_packed = false;
4967 tree type = (TREE_TYPE (DR_REF (dr)));
4969 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4970 && (!targetm.vectorize.builtin_mask_for_load
4971 || targetm.vectorize.builtin_mask_for_load ()))
4973 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4974 if ((nested_in_vect_loop
4975 && (TREE_INT_CST_LOW (DR_STEP (dr))
4976 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4977 || !loop_vinfo)
4978 return dr_explicit_realign;
4979 else
4980 return dr_explicit_realign_optimized;
4982 if (!known_alignment_for_access_p (dr))
4983 is_packed = not_size_aligned (DR_REF (dr));
4985 if (targetm.vectorize.
4986 support_vector_misalignment (mode, type,
4987 DR_MISALIGNMENT (dr), is_packed))
4988 /* Can't software pipeline the loads, but can at least do them. */
4989 return dr_unaligned_supported;
4991 else
4993 bool is_packed = false;
4994 tree type = (TREE_TYPE (DR_REF (dr)));
4996 if (!known_alignment_for_access_p (dr))
4997 is_packed = not_size_aligned (DR_REF (dr));
4999 if (targetm.vectorize.
5000 support_vector_misalignment (mode, type,
5001 DR_MISALIGNMENT (dr), is_packed))
5002 return dr_unaligned_supported;
5005 /* Unsupported. */
5006 return dr_unaligned_unsupported;