* config/rs6000/rs6000.c (rs6000_option_override_internal): Do not
[official-gcc.git] / gcc / tree-vect-data-refs.c
blobdc6e1e72b9e0f41bc1cccf78a3e4f4abab60515e
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "dumpfile.h"
27 #include "tm.h"
28 #include "ggc.h"
29 #include "tree.h"
30 #include "tm_p.h"
31 #include "target.h"
32 #include "basic-block.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "dumpfile.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
49 static bool
50 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 tree vectype, unsigned HOST_WIDE_INT count)
53 enum machine_mode mode, array_mode;
54 bool limit_p;
56 mode = TYPE_MODE (vectype);
57 limit_p = !targetm.array_mode_supported_p (mode, count);
58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
59 MODE_INT, limit_p);
61 if (array_mode == BLKmode)
63 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
64 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
65 "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
66 GET_MODE_NAME (mode), count);
67 return false;
70 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
72 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
73 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
74 "cannot use %s<%s><%s>", name,
75 GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
76 return false;
79 if (dump_kind_p (MSG_NOTE))
80 dump_printf_loc (MSG_NOTE, vect_location,
81 "can use %s<%s><%s>", name, GET_MODE_NAME (array_mode),
82 GET_MODE_NAME (mode));
84 return true;
88 /* Return the smallest scalar part of STMT.
89 This is used to determine the vectype of the stmt. We generally set the
90 vectype according to the type of the result (lhs). For stmts whose
91 result-type is different than the type of the arguments (e.g., demotion,
92 promotion), vectype will be reset appropriately (later). Note that we have
93 to visit the smallest datatype in this function, because that determines the
94 VF. If the smallest datatype in the loop is present only as the rhs of a
95 promotion operation - we'd miss it.
96 Such a case, where a variable of this datatype does not appear in the lhs
97 anywhere in the loop, can only occur if it's an invariant: e.g.:
98 'int_x = (int) short_inv', which we'd expect to have been optimized away by
99 invariant motion. However, we cannot rely on invariant motion to always
100 take invariants out of the loop, and so in the case of promotion we also
101 have to check the rhs.
102 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
103 types. */
105 tree
106 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
107 HOST_WIDE_INT *rhs_size_unit)
109 tree scalar_type = gimple_expr_type (stmt);
110 HOST_WIDE_INT lhs, rhs;
112 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
114 if (is_gimple_assign (stmt)
115 && (gimple_assign_cast_p (stmt)
116 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
117 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
118 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
120 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
122 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
123 if (rhs < lhs)
124 scalar_type = rhs_type;
127 *lhs_size_unit = lhs;
128 *rhs_size_unit = rhs;
129 return scalar_type;
133 /* Find the place of the data-ref in STMT in the interleaving chain that starts
134 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
137 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
139 gimple next_stmt = first_stmt;
140 int result = 0;
142 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
143 return -1;
145 while (next_stmt && next_stmt != stmt)
147 result++;
148 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
151 if (next_stmt)
152 return result;
153 else
154 return -1;
158 /* Function vect_insert_into_interleaving_chain.
160 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
162 static void
163 vect_insert_into_interleaving_chain (struct data_reference *dra,
164 struct data_reference *drb)
166 gimple prev, next;
167 tree next_init;
168 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
169 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
171 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
172 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
173 while (next)
175 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
176 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
178 /* Insert here. */
179 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
180 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
181 return;
183 prev = next;
184 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
187 /* We got to the end of the list. Insert here. */
188 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
189 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
193 /* Function vect_update_interleaving_chain.
195 For two data-refs DRA and DRB that are a part of a chain interleaved data
196 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
198 There are four possible cases:
199 1. New stmts - both DRA and DRB are not a part of any chain:
200 FIRST_DR = DRB
201 NEXT_DR (DRB) = DRA
202 2. DRB is a part of a chain and DRA is not:
203 no need to update FIRST_DR
204 no need to insert DRB
205 insert DRA according to init
206 3. DRA is a part of a chain and DRB is not:
207 if (init of FIRST_DR > init of DRB)
208 FIRST_DR = DRB
209 NEXT(FIRST_DR) = previous FIRST_DR
210 else
211 insert DRB according to its init
212 4. both DRA and DRB are in some interleaving chains:
213 choose the chain with the smallest init of FIRST_DR
214 insert the nodes of the second chain into the first one. */
216 static void
217 vect_update_interleaving_chain (struct data_reference *drb,
218 struct data_reference *dra)
220 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
221 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
222 tree next_init, init_dra_chain, init_drb_chain;
223 gimple first_a, first_b;
224 tree node_init;
225 gimple node, prev, next, first_stmt;
227 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
228 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
230 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
231 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
232 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
233 return;
236 /* 2. DRB is a part of a chain and DRA is not. */
237 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
239 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
240 /* Insert DRA into the chain of DRB. */
241 vect_insert_into_interleaving_chain (dra, drb);
242 return;
245 /* 3. DRA is a part of a chain and DRB is not. */
246 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
248 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
249 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
250 old_first_stmt)));
251 gimple tmp;
253 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
255 /* DRB's init is smaller than the init of the stmt previously marked
256 as the first stmt of the interleaving chain of DRA. Therefore, we
257 update FIRST_STMT and put DRB in the head of the list. */
258 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
259 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
261 /* Update all the stmts in the list to point to the new FIRST_STMT. */
262 tmp = old_first_stmt;
263 while (tmp)
265 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
266 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
269 else
271 /* Insert DRB in the list of DRA. */
272 vect_insert_into_interleaving_chain (drb, dra);
273 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
275 return;
278 /* 4. both DRA and DRB are in some interleaving chains. */
279 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
280 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
281 if (first_a == first_b)
282 return;
283 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
284 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
286 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
288 /* Insert the nodes of DRA chain into the DRB chain.
289 After inserting a node, continue from this node of the DRB chain (don't
290 start from the beginning. */
291 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
292 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
293 first_stmt = first_b;
295 else
297 /* Insert the nodes of DRB chain into the DRA chain.
298 After inserting a node, continue from this node of the DRA chain (don't
299 start from the beginning. */
300 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
301 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
302 first_stmt = first_a;
305 while (node)
307 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
308 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
309 while (next)
311 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
312 if (tree_int_cst_compare (next_init, node_init) > 0)
314 /* Insert here. */
315 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
316 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
317 prev = node;
318 break;
320 prev = next;
321 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
323 if (!next)
325 /* We got to the end of the list. Insert here. */
326 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
327 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
328 prev = node;
330 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
331 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
335 /* Check dependence between DRA and DRB for basic block vectorization.
336 If the accesses share same bases and offsets, we can compare their initial
337 constant offsets to decide whether they differ or not. In case of a read-
338 write dependence we check that the load is before the store to ensure that
339 vectorization will not change the order of the accesses. */
341 static bool
342 vect_drs_dependent_in_basic_block (struct data_reference *dra,
343 struct data_reference *drb)
345 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
346 gimple earlier_stmt;
348 /* We only call this function for pairs of loads and stores, but we verify
349 it here. */
350 if (DR_IS_READ (dra) == DR_IS_READ (drb))
352 if (DR_IS_READ (dra))
353 return false;
354 else
355 return true;
358 /* Check that the data-refs have same bases and offsets. If not, we can't
359 determine if they are dependent. */
360 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
361 || !dr_equal_offsets_p (dra, drb))
362 return true;
364 /* Check the types. */
365 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
366 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
368 if (type_size_a != type_size_b
369 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
370 TREE_TYPE (DR_REF (drb))))
371 return true;
373 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
374 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
376 /* Two different locations - no dependence. */
377 if (init_a != init_b)
378 return false;
380 /* We have a read-write dependence. Check that the load is before the store.
381 When we vectorize basic blocks, vector load can be only before
382 corresponding scalar load, and vector store can be only after its
383 corresponding scalar store. So the order of the acceses is preserved in
384 case the load is before the store. */
385 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
386 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
387 return false;
389 return true;
393 /* Function vect_check_interleaving.
395 Check if DRA and DRB are a part of interleaving. In case they are, insert
396 DRA and DRB in an interleaving chain. */
398 static bool
399 vect_check_interleaving (struct data_reference *dra,
400 struct data_reference *drb)
402 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
404 /* Check that the data-refs have same first location (except init) and they
405 are both either store or load (not load and store). */
406 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
407 || !dr_equal_offsets_p (dra, drb)
408 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
409 || DR_IS_READ (dra) != DR_IS_READ (drb))
410 return false;
412 /* Check:
413 1. data-refs are of the same type
414 2. their steps are equal
415 3. the step (if greater than zero) is greater than the difference between
416 data-refs' inits. */
417 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
418 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
420 if (type_size_a != type_size_b
421 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
422 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
423 TREE_TYPE (DR_REF (drb))))
424 return false;
426 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
427 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
428 step = TREE_INT_CST_LOW (DR_STEP (dra));
430 if (init_a > init_b)
432 /* If init_a == init_b + the size of the type * k, we have an interleaving,
433 and DRB is accessed before DRA. */
434 diff_mod_size = (init_a - init_b) % type_size_a;
436 if (step && (init_a - init_b) > step)
437 return false;
439 if (diff_mod_size == 0)
441 vect_update_interleaving_chain (drb, dra);
442 if (dump_kind_p (MSG_NOTE))
444 dump_printf_loc (MSG_NOTE, vect_location,
445 "Detected interleaving ");
446 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
447 dump_printf (MSG_NOTE, " and ");
448 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
450 return true;
453 else
455 /* If init_b == init_a + the size of the type * k, we have an
456 interleaving, and DRA is accessed before DRB. */
457 diff_mod_size = (init_b - init_a) % type_size_a;
459 if (step && (init_b - init_a) > step)
460 return false;
462 if (diff_mod_size == 0)
464 vect_update_interleaving_chain (dra, drb);
465 if (dump_kind_p (MSG_NOTE))
467 dump_printf_loc (MSG_NOTE, vect_location,
468 "Detected interleaving ");
469 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
470 dump_printf (MSG_NOTE, " and ");
471 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
473 return true;
477 return false;
480 /* Check if data references pointed by DR_I and DR_J are same or
481 belong to same interleaving group. Return FALSE if drs are
482 different, otherwise return TRUE. */
484 static bool
485 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
487 gimple stmt_i = DR_STMT (dr_i);
488 gimple stmt_j = DR_STMT (dr_j);
490 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
491 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
492 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
493 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
494 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
495 return true;
496 else
497 return false;
500 /* If address ranges represented by DDR_I and DDR_J are equal,
501 return TRUE, otherwise return FALSE. */
503 static bool
504 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
506 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
507 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
508 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
509 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
510 return true;
511 else
512 return false;
515 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
516 tested at run-time. Return TRUE if DDR was successfully inserted.
517 Return false if versioning is not supported. */
519 static bool
520 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
522 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
524 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
525 return false;
527 if (dump_kind_p (MSG_NOTE))
529 dump_printf_loc (MSG_NOTE, vect_location,
530 "mark for run-time aliasing test between ");
531 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
532 dump_printf (MSG_NOTE, " and ");
533 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
536 if (optimize_loop_nest_for_size_p (loop))
538 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
540 "versioning not supported when optimizing for size.");
541 return false;
544 /* FORNOW: We don't support versioning with outer-loop vectorization. */
545 if (loop->inner)
547 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
549 "versioning not yet supported for outer-loops.");
550 return false;
553 /* FORNOW: We don't support creating runtime alias tests for non-constant
554 step. */
555 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
556 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
558 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
560 "versioning not yet supported for non-constant "
561 "step");
562 return false;
565 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
566 return true;
570 /* Function vect_analyze_data_ref_dependence.
572 Return TRUE if there (might) exist a dependence between a memory-reference
573 DRA and a memory-reference DRB. When versioning for alias may check a
574 dependence at run-time, return FALSE. Adjust *MAX_VF according to
575 the data dependence. */
577 static bool
578 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
579 loop_vec_info loop_vinfo, int *max_vf)
581 unsigned int i;
582 struct loop *loop = NULL;
583 struct data_reference *dra = DDR_A (ddr);
584 struct data_reference *drb = DDR_B (ddr);
585 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
586 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
587 lambda_vector dist_v;
588 unsigned int loop_depth;
590 /* Don't bother to analyze statements marked as unvectorizable. */
591 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
592 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
593 return false;
595 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
597 /* Independent data accesses. */
598 vect_check_interleaving (dra, drb);
599 return false;
602 if (loop_vinfo)
603 loop = LOOP_VINFO_LOOP (loop_vinfo);
605 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
606 return false;
608 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
610 gimple earlier_stmt;
612 if (loop_vinfo)
614 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
617 "versioning for alias required: "
618 "can't determine dependence between ");
619 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
620 DR_REF (dra));
621 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
622 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
623 DR_REF (drb));
626 /* Add to list of ddrs that need to be tested at run-time. */
627 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
630 /* When vectorizing a basic block unknown depnedence can still mean
631 grouped access. */
632 if (vect_check_interleaving (dra, drb))
633 return false;
635 /* Read-read is OK (we need this check here, after checking for
636 interleaving). */
637 if (DR_IS_READ (dra) && DR_IS_READ (drb))
638 return false;
640 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
643 "can't determine dependence between ");
644 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
645 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
646 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
649 /* We do not vectorize basic blocks with write-write dependencies. */
650 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
651 return true;
653 /* Check that it's not a load-after-store dependence. */
654 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
655 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
656 return true;
658 return false;
661 /* Versioning for alias is not yet supported for basic block SLP, and
662 dependence distance is unapplicable, hence, in case of known data
663 dependence, basic block vectorization is impossible for now. */
664 if (!loop_vinfo)
666 if (dra != drb && vect_check_interleaving (dra, drb))
667 return false;
669 if (dump_kind_p (MSG_NOTE))
671 dump_printf_loc (MSG_NOTE, vect_location,
672 "determined dependence between ");
673 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
674 dump_printf (MSG_NOTE, " and ");
675 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
678 /* Do not vectorize basic blcoks with write-write dependences. */
679 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
680 return true;
682 /* Check if this dependence is allowed in basic block vectorization. */
683 return vect_drs_dependent_in_basic_block (dra, drb);
686 /* Loop-based vectorization and known data dependence. */
687 if (DDR_NUM_DIST_VECTS (ddr) == 0)
689 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
692 "versioning for alias required: "
693 "bad dist vector for ");
694 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
695 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
696 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
698 /* Add to list of ddrs that need to be tested at run-time. */
699 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
702 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
703 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
705 int dist = dist_v[loop_depth];
707 if (dump_kind_p (MSG_NOTE))
708 dump_printf_loc (MSG_NOTE, vect_location,
709 "dependence distance = %d.", dist);
711 if (dist == 0)
713 if (dump_kind_p (MSG_NOTE))
715 dump_printf_loc (MSG_NOTE, vect_location,
716 "dependence distance == 0 between ");
717 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
718 dump_printf (MSG_NOTE, " and ");
719 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
722 /* For interleaving, mark that there is a read-write dependency if
723 necessary. We check before that one of the data-refs is store. */
724 if (DR_IS_READ (dra))
725 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
726 else
728 if (DR_IS_READ (drb))
729 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
732 continue;
735 if (dist > 0 && DDR_REVERSED_P (ddr))
737 /* If DDR_REVERSED_P the order of the data-refs in DDR was
738 reversed (to make distance vector positive), and the actual
739 distance is negative. */
740 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "dependence distance negative.");
743 continue;
746 if (abs (dist) >= 2
747 && abs (dist) < *max_vf)
749 /* The dependence distance requires reduction of the maximal
750 vectorization factor. */
751 *max_vf = abs (dist);
752 if (dump_kind_p (MSG_NOTE))
753 dump_printf_loc (MSG_NOTE, vect_location,
754 "adjusting maximal vectorization factor to %i",
755 *max_vf);
758 if (abs (dist) >= *max_vf)
760 /* Dependence distance does not create dependence, as far as
761 vectorization is concerned, in this case. */
762 if (dump_kind_p (MSG_NOTE))
763 dump_printf_loc (MSG_NOTE, vect_location,
764 "dependence distance >= VF.");
765 continue;
768 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
771 "not vectorized, possible dependence "
772 "between data-refs ");
773 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
774 dump_printf (MSG_NOTE, " and ");
775 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
778 return true;
781 return false;
784 /* Function vect_analyze_data_ref_dependences.
786 Examine all the data references in the loop, and make sure there do not
787 exist any data dependences between them. Set *MAX_VF according to
788 the maximum vectorization factor the data dependences allow. */
790 bool
791 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
792 bb_vec_info bb_vinfo, int *max_vf)
794 unsigned int i;
795 VEC (ddr_p, heap) *ddrs = NULL;
796 struct data_dependence_relation *ddr;
798 if (dump_kind_p (MSG_NOTE))
799 dump_printf_loc (MSG_NOTE, vect_location,
800 "=== vect_analyze_dependences ===");
801 if (loop_vinfo)
802 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
803 else
804 ddrs = BB_VINFO_DDRS (bb_vinfo);
806 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
807 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
808 return false;
810 return true;
814 /* Function vect_compute_data_ref_alignment
816 Compute the misalignment of the data reference DR.
818 Output:
819 1. If during the misalignment computation it is found that the data reference
820 cannot be vectorized then false is returned.
821 2. DR_MISALIGNMENT (DR) is defined.
823 FOR NOW: No analysis is actually performed. Misalignment is calculated
824 only for trivial cases. TODO. */
826 static bool
827 vect_compute_data_ref_alignment (struct data_reference *dr)
829 gimple stmt = DR_STMT (dr);
830 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
831 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
832 struct loop *loop = NULL;
833 tree ref = DR_REF (dr);
834 tree vectype;
835 tree base, base_addr;
836 bool base_aligned;
837 tree misalign;
838 tree aligned_to, alignment;
840 if (dump_kind_p (MSG_NOTE))
841 dump_printf_loc (MSG_NOTE, vect_location,
842 "vect_compute_data_ref_alignment:");
844 if (loop_vinfo)
845 loop = LOOP_VINFO_LOOP (loop_vinfo);
847 /* Initialize misalignment to unknown. */
848 SET_DR_MISALIGNMENT (dr, -1);
850 /* Strided loads perform only component accesses, misalignment information
851 is irrelevant for them. */
852 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
853 return true;
855 misalign = DR_INIT (dr);
856 aligned_to = DR_ALIGNED_TO (dr);
857 base_addr = DR_BASE_ADDRESS (dr);
858 vectype = STMT_VINFO_VECTYPE (stmt_info);
860 /* In case the dataref is in an inner-loop of the loop that is being
861 vectorized (LOOP), we use the base and misalignment information
862 relative to the outer-loop (LOOP). This is ok only if the misalignment
863 stays the same throughout the execution of the inner-loop, which is why
864 we have to check that the stride of the dataref in the inner-loop evenly
865 divides by the vector size. */
866 if (loop && nested_in_vect_loop_p (loop, stmt))
868 tree step = DR_STEP (dr);
869 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
871 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
873 if (dump_kind_p (MSG_NOTE))
874 dump_printf_loc (MSG_NOTE, vect_location,
875 "inner step divides the vector-size.");
876 misalign = STMT_VINFO_DR_INIT (stmt_info);
877 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
878 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
880 else
882 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
884 "inner step doesn't divide the vector-size.");
885 misalign = NULL_TREE;
889 /* Similarly, if we're doing basic-block vectorization, we can only use
890 base and misalignment information relative to an innermost loop if the
891 misalignment stays the same throughout the execution of the loop.
892 As above, this is the case if the stride of the dataref evenly divides
893 by the vector size. */
894 if (!loop)
896 tree step = DR_STEP (dr);
897 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
899 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
901 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
903 "SLP: step doesn't divide the vector-size.");
904 misalign = NULL_TREE;
908 base = build_fold_indirect_ref (base_addr);
909 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
911 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
912 || !misalign)
914 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
917 "Unknown alignment for access: ");
918 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
920 return true;
923 if ((DECL_P (base)
924 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
925 alignment) >= 0)
926 || (TREE_CODE (base_addr) == SSA_NAME
927 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
928 TREE_TYPE (base_addr)))),
929 alignment) >= 0)
930 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
931 base_aligned = true;
932 else
933 base_aligned = false;
935 if (!base_aligned)
937 /* Do not change the alignment of global variables here if
938 flag_section_anchors is enabled as we already generated
939 RTL for other functions. Most global variables should
940 have been aligned during the IPA increase_alignment pass. */
941 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
942 || (TREE_STATIC (base) && flag_section_anchors))
944 if (dump_kind_p (MSG_NOTE))
946 dump_printf_loc (MSG_NOTE, vect_location,
947 "can't force alignment of ref: ");
948 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
950 return true;
953 /* Force the alignment of the decl.
954 NOTE: This is the only change to the code we make during
955 the analysis phase, before deciding to vectorize the loop. */
956 if (dump_kind_p (MSG_NOTE))
958 dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
959 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
962 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
963 DECL_USER_ALIGN (base) = 1;
966 /* At this point we assume that the base is aligned. */
967 gcc_assert (base_aligned
968 || (TREE_CODE (base) == VAR_DECL
969 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
971 /* If this is a backward running DR then first access in the larger
972 vectype actually is N-1 elements before the address in the DR.
973 Adjust misalign accordingly. */
974 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
976 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
977 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
978 otherwise we wouldn't be here. */
979 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
980 /* PLUS because DR_STEP was negative. */
981 misalign = size_binop (PLUS_EXPR, misalign, offset);
984 /* Modulo alignment. */
985 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
987 if (!host_integerp (misalign, 1))
989 /* Negative or overflowed misalignment value. */
990 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
991 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
992 "unexpected misalign value");
993 return false;
996 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
998 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1001 "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1002 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
1005 return true;
1009 /* Function vect_compute_data_refs_alignment
1011 Compute the misalignment of data references in the loop.
1012 Return FALSE if a data reference is found that cannot be vectorized. */
1014 static bool
1015 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
1016 bb_vec_info bb_vinfo)
1018 VEC (data_reference_p, heap) *datarefs;
1019 struct data_reference *dr;
1020 unsigned int i;
1022 if (loop_vinfo)
1023 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1024 else
1025 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1027 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1028 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
1029 && !vect_compute_data_ref_alignment (dr))
1031 if (bb_vinfo)
1033 /* Mark unsupported statement as unvectorizable. */
1034 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1035 continue;
1037 else
1038 return false;
1041 return true;
1045 /* Function vect_update_misalignment_for_peel
1047 DR - the data reference whose misalignment is to be adjusted.
1048 DR_PEEL - the data reference whose misalignment is being made
1049 zero in the vector loop by the peel.
1050 NPEEL - the number of iterations in the peel loop if the misalignment
1051 of DR_PEEL is known at compile time. */
1053 static void
1054 vect_update_misalignment_for_peel (struct data_reference *dr,
1055 struct data_reference *dr_peel, int npeel)
1057 unsigned int i;
1058 VEC(dr_p,heap) *same_align_drs;
1059 struct data_reference *current_dr;
1060 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1061 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1062 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1063 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1065 /* For interleaved data accesses the step in the loop must be multiplied by
1066 the size of the interleaving group. */
1067 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1068 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1069 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
1070 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1072 /* It can be assumed that the data refs with the same alignment as dr_peel
1073 are aligned in the vector loop. */
1074 same_align_drs
1075 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1076 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1078 if (current_dr != dr)
1079 continue;
1080 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1081 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1082 SET_DR_MISALIGNMENT (dr, 0);
1083 return;
1086 if (known_alignment_for_access_p (dr)
1087 && known_alignment_for_access_p (dr_peel))
1089 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1090 int misal = DR_MISALIGNMENT (dr);
1091 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1092 misal += negative ? -npeel * dr_size : npeel * dr_size;
1093 misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
1094 SET_DR_MISALIGNMENT (dr, misal);
1095 return;
1098 if (dump_kind_p (MSG_NOTE))
1099 dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment to -1.");
1100 SET_DR_MISALIGNMENT (dr, -1);
1104 /* Function vect_verify_datarefs_alignment
1106 Return TRUE if all data references in the loop can be
1107 handled with respect to alignment. */
1109 bool
1110 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1112 VEC (data_reference_p, heap) *datarefs;
1113 struct data_reference *dr;
1114 enum dr_alignment_support supportable_dr_alignment;
1115 unsigned int i;
1117 if (loop_vinfo)
1118 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1119 else
1120 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1122 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1124 gimple stmt = DR_STMT (dr);
1125 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1127 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1128 continue;
1130 /* For interleaving, only the alignment of the first access matters.
1131 Skip statements marked as not vectorizable. */
1132 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
1133 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1134 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1135 continue;
1137 /* Strided loads perform only component accesses, alignment is
1138 irrelevant for them. */
1139 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1140 continue;
1142 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1143 if (!supportable_dr_alignment)
1145 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1147 if (DR_IS_READ (dr))
1148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1149 "not vectorized: unsupported unaligned load.");
1150 else
1151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1152 "not vectorized: unsupported unaligned "
1153 "store.");
1155 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1156 DR_REF (dr));
1158 return false;
1160 if (supportable_dr_alignment != dr_aligned
1161 && dump_kind_p (MSG_NOTE))
1162 dump_printf_loc (MSG_NOTE, vect_location,
1163 "Vectorizing an unaligned access.");
1165 return true;
1168 /* Given an memory reference EXP return whether its alignment is less
1169 than its size. */
1171 static bool
1172 not_size_aligned (tree exp)
1174 if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp)), 1))
1175 return true;
1177 return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp)))
1178 > get_object_alignment (exp));
1181 /* Function vector_alignment_reachable_p
1183 Return true if vector alignment for DR is reachable by peeling
1184 a few loop iterations. Return false otherwise. */
1186 static bool
1187 vector_alignment_reachable_p (struct data_reference *dr)
1189 gimple stmt = DR_STMT (dr);
1190 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1191 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1193 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1195 /* For interleaved access we peel only if number of iterations in
1196 the prolog loop ({VF - misalignment}), is a multiple of the
1197 number of the interleaved accesses. */
1198 int elem_size, mis_in_elements;
1199 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1201 /* FORNOW: handle only known alignment. */
1202 if (!known_alignment_for_access_p (dr))
1203 return false;
1205 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1206 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1208 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1209 return false;
1212 /* If misalignment is known at the compile time then allow peeling
1213 only if natural alignment is reachable through peeling. */
1214 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1216 HOST_WIDE_INT elmsize =
1217 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1218 if (dump_kind_p (MSG_NOTE))
1220 dump_printf_loc (MSG_NOTE, vect_location,
1221 "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1222 dump_printf (MSG_NOTE,
1223 ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1225 if (DR_MISALIGNMENT (dr) % elmsize)
1227 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1229 "data size does not divide the misalignment.\n");
1230 return false;
1234 if (!known_alignment_for_access_p (dr))
1236 tree type = TREE_TYPE (DR_REF (dr));
1237 bool is_packed = not_size_aligned (DR_REF (dr));
1238 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1239 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1240 "Unknown misalignment, is_packed = %d",is_packed);
1241 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1242 return true;
1243 else
1244 return false;
1247 return true;
1251 /* Calculate the cost of the memory access represented by DR. */
1253 static void
1254 vect_get_data_access_cost (struct data_reference *dr,
1255 unsigned int *inside_cost,
1256 unsigned int *outside_cost,
1257 stmt_vector_for_cost *body_cost_vec)
1259 gimple stmt = DR_STMT (dr);
1260 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1261 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1262 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1263 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1264 int ncopies = vf / nunits;
1266 if (DR_IS_READ (dr))
1267 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1268 NULL, body_cost_vec, false);
1269 else
1270 vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
1272 if (dump_kind_p (MSG_NOTE))
1273 dump_printf_loc (MSG_NOTE, vect_location,
1274 "vect_get_data_access_cost: inside_cost = %d, "
1275 "outside_cost = %d.", *inside_cost, *outside_cost);
1279 static hashval_t
1280 vect_peeling_hash (const void *elem)
1282 const struct _vect_peel_info *peel_info;
1284 peel_info = (const struct _vect_peel_info *) elem;
1285 return (hashval_t) peel_info->npeel;
1289 static int
1290 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1292 const struct _vect_peel_info *a, *b;
1294 a = (const struct _vect_peel_info *) elem1;
1295 b = (const struct _vect_peel_info *) elem2;
1296 return (a->npeel == b->npeel);
1300 /* Insert DR into peeling hash table with NPEEL as key. */
1302 static void
1303 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1304 int npeel)
1306 struct _vect_peel_info elem, *slot;
1307 void **new_slot;
1308 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1310 elem.npeel = npeel;
1311 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1312 &elem);
1313 if (slot)
1314 slot->count++;
1315 else
1317 slot = XNEW (struct _vect_peel_info);
1318 slot->npeel = npeel;
1319 slot->dr = dr;
1320 slot->count = 1;
1321 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1322 INSERT);
1323 *new_slot = slot;
1326 if (!supportable_dr_alignment && !flag_vect_cost_model)
1327 slot->count += VECT_MAX_COST;
1331 /* Traverse peeling hash table to find peeling option that aligns maximum
1332 number of data accesses. */
1334 static int
1335 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1337 vect_peel_info elem = (vect_peel_info) *slot;
1338 vect_peel_extended_info max = (vect_peel_extended_info) data;
1340 if (elem->count > max->peel_info.count
1341 || (elem->count == max->peel_info.count
1342 && max->peel_info.npeel > elem->npeel))
1344 max->peel_info.npeel = elem->npeel;
1345 max->peel_info.count = elem->count;
1346 max->peel_info.dr = elem->dr;
1349 return 1;
1353 /* Traverse peeling hash table and calculate cost for each peeling option.
1354 Find the one with the lowest cost. */
1356 static int
1357 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1359 vect_peel_info elem = (vect_peel_info) *slot;
1360 vect_peel_extended_info min = (vect_peel_extended_info) data;
1361 int save_misalignment, dummy;
1362 unsigned int inside_cost = 0, outside_cost = 0, i;
1363 gimple stmt = DR_STMT (elem->dr);
1364 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1365 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1366 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1367 struct data_reference *dr;
1368 stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1369 int single_iter_cost;
1371 prologue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1372 body_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1373 epilogue_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
1375 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1377 stmt = DR_STMT (dr);
1378 stmt_info = vinfo_for_stmt (stmt);
1379 /* For interleaving, only the alignment of the first access
1380 matters. */
1381 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1382 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1383 continue;
1385 save_misalignment = DR_MISALIGNMENT (dr);
1386 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1387 vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1388 &body_cost_vec);
1389 SET_DR_MISALIGNMENT (dr, save_misalignment);
1392 single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1393 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1394 &dummy, single_iter_cost,
1395 &prologue_cost_vec,
1396 &epilogue_cost_vec);
1398 /* Prologue and epilogue costs are added to the target model later.
1399 These costs depend only on the scalar iteration cost, the
1400 number of peeling iterations finally chosen, and the number of
1401 misaligned statements. So discard the information found here. */
1402 VEC_free (stmt_info_for_cost, heap, prologue_cost_vec);
1403 VEC_free (stmt_info_for_cost, heap, epilogue_cost_vec);
1405 if (inside_cost < min->inside_cost
1406 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1408 min->inside_cost = inside_cost;
1409 min->outside_cost = outside_cost;
1410 VEC_free (stmt_info_for_cost, heap, min->body_cost_vec);
1411 min->body_cost_vec = body_cost_vec;
1412 min->peel_info.dr = elem->dr;
1413 min->peel_info.npeel = elem->npeel;
1415 else
1416 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1418 return 1;
1422 /* Choose best peeling option by traversing peeling hash table and either
1423 choosing an option with the lowest cost (if cost model is enabled) or the
1424 option that aligns as many accesses as possible. */
1426 static struct data_reference *
1427 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1428 unsigned int *npeel,
1429 stmt_vector_for_cost *body_cost_vec)
1431 struct _vect_peel_extended_info res;
1433 res.peel_info.dr = NULL;
1434 res.body_cost_vec = NULL;
1436 if (flag_vect_cost_model)
1438 res.inside_cost = INT_MAX;
1439 res.outside_cost = INT_MAX;
1440 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1441 vect_peeling_hash_get_lowest_cost, &res);
1443 else
1445 res.peel_info.count = 0;
1446 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1447 vect_peeling_hash_get_most_frequent, &res);
1450 *npeel = res.peel_info.npeel;
1451 *body_cost_vec = res.body_cost_vec;
1452 return res.peel_info.dr;
1456 /* Function vect_enhance_data_refs_alignment
1458 This pass will use loop versioning and loop peeling in order to enhance
1459 the alignment of data references in the loop.
1461 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1462 original loop is to be vectorized. Any other loops that are created by
1463 the transformations performed in this pass - are not supposed to be
1464 vectorized. This restriction will be relaxed.
1466 This pass will require a cost model to guide it whether to apply peeling
1467 or versioning or a combination of the two. For example, the scheme that
1468 intel uses when given a loop with several memory accesses, is as follows:
1469 choose one memory access ('p') which alignment you want to force by doing
1470 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1471 other accesses are not necessarily aligned, or (2) use loop versioning to
1472 generate one loop in which all accesses are aligned, and another loop in
1473 which only 'p' is necessarily aligned.
1475 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1476 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1477 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1479 Devising a cost model is the most critical aspect of this work. It will
1480 guide us on which access to peel for, whether to use loop versioning, how
1481 many versions to create, etc. The cost model will probably consist of
1482 generic considerations as well as target specific considerations (on
1483 powerpc for example, misaligned stores are more painful than misaligned
1484 loads).
1486 Here are the general steps involved in alignment enhancements:
1488 -- original loop, before alignment analysis:
1489 for (i=0; i<N; i++){
1490 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1491 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1494 -- After vect_compute_data_refs_alignment:
1495 for (i=0; i<N; i++){
1496 x = q[i]; # DR_MISALIGNMENT(q) = 3
1497 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1500 -- Possibility 1: we do loop versioning:
1501 if (p is aligned) {
1502 for (i=0; i<N; i++){ # loop 1A
1503 x = q[i]; # DR_MISALIGNMENT(q) = 3
1504 p[i] = y; # DR_MISALIGNMENT(p) = 0
1507 else {
1508 for (i=0; i<N; i++){ # loop 1B
1509 x = q[i]; # DR_MISALIGNMENT(q) = 3
1510 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1514 -- Possibility 2: we do loop peeling:
1515 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1516 x = q[i];
1517 p[i] = y;
1519 for (i = 3; i < N; i++){ # loop 2A
1520 x = q[i]; # DR_MISALIGNMENT(q) = 0
1521 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1524 -- Possibility 3: combination of loop peeling and versioning:
1525 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1526 x = q[i];
1527 p[i] = y;
1529 if (p is aligned) {
1530 for (i = 3; i<N; i++){ # loop 3A
1531 x = q[i]; # DR_MISALIGNMENT(q) = 0
1532 p[i] = y; # DR_MISALIGNMENT(p) = 0
1535 else {
1536 for (i = 3; i<N; i++){ # loop 3B
1537 x = q[i]; # DR_MISALIGNMENT(q) = 0
1538 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1542 These loops are later passed to loop_transform to be vectorized. The
1543 vectorizer will use the alignment information to guide the transformation
1544 (whether to generate regular loads/stores, or with special handling for
1545 misalignment). */
1547 bool
1548 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1550 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1551 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1552 enum dr_alignment_support supportable_dr_alignment;
1553 struct data_reference *dr0 = NULL, *first_store = NULL;
1554 struct data_reference *dr;
1555 unsigned int i, j;
1556 bool do_peeling = false;
1557 bool do_versioning = false;
1558 bool stat;
1559 gimple stmt;
1560 stmt_vec_info stmt_info;
1561 int vect_versioning_for_alias_required;
1562 unsigned int npeel = 0;
1563 bool all_misalignments_unknown = true;
1564 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1565 unsigned possible_npeel_number = 1;
1566 tree vectype;
1567 unsigned int nelements, mis, same_align_drs_max = 0;
1568 stmt_vector_for_cost body_cost_vec = NULL;
1570 if (dump_kind_p (MSG_NOTE))
1571 dump_printf_loc (MSG_NOTE, vect_location,
1572 "=== vect_enhance_data_refs_alignment ===");
1574 /* While cost model enhancements are expected in the future, the high level
1575 view of the code at this time is as follows:
1577 A) If there is a misaligned access then see if peeling to align
1578 this access can make all data references satisfy
1579 vect_supportable_dr_alignment. If so, update data structures
1580 as needed and return true.
1582 B) If peeling wasn't possible and there is a data reference with an
1583 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1584 then see if loop versioning checks can be used to make all data
1585 references satisfy vect_supportable_dr_alignment. If so, update
1586 data structures as needed and return true.
1588 C) If neither peeling nor versioning were successful then return false if
1589 any data reference does not satisfy vect_supportable_dr_alignment.
1591 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1593 Note, Possibility 3 above (which is peeling and versioning together) is not
1594 being done at this time. */
1596 /* (1) Peeling to force alignment. */
1598 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1599 Considerations:
1600 + How many accesses will become aligned due to the peeling
1601 - How many accesses will become unaligned due to the peeling,
1602 and the cost of misaligned accesses.
1603 - The cost of peeling (the extra runtime checks, the increase
1604 in code size). */
1606 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1608 stmt = DR_STMT (dr);
1609 stmt_info = vinfo_for_stmt (stmt);
1611 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1612 continue;
1614 /* For interleaving, only the alignment of the first access
1615 matters. */
1616 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1617 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1618 continue;
1620 /* FORNOW: Any strided load prevents peeling. The induction
1621 variable analysis will fail when the prologue loop is generated,
1622 and so we can't generate the new base for the pointer. */
1623 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1625 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1627 "strided load prevents peeling");
1628 do_peeling = false;
1629 break;
1632 /* For invariant accesses there is nothing to enhance. */
1633 if (integer_zerop (DR_STEP (dr)))
1634 continue;
1636 /* Strided loads perform only component accesses, alignment is
1637 irrelevant for them. */
1638 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1639 continue;
1641 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1642 do_peeling = vector_alignment_reachable_p (dr);
1643 if (do_peeling)
1645 if (known_alignment_for_access_p (dr))
1647 unsigned int npeel_tmp;
1648 bool negative = tree_int_cst_compare (DR_STEP (dr),
1649 size_zero_node) < 0;
1651 /* Save info about DR in the hash table. */
1652 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1653 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1654 htab_create (1, vect_peeling_hash,
1655 vect_peeling_hash_eq, free);
1657 vectype = STMT_VINFO_VECTYPE (stmt_info);
1658 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1659 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1660 TREE_TYPE (DR_REF (dr))));
1661 npeel_tmp = (negative
1662 ? (mis - nelements) : (nelements - mis))
1663 & (nelements - 1);
1665 /* For multiple types, it is possible that the bigger type access
1666 will have more than one peeling option. E.g., a loop with two
1667 types: one of size (vector size / 4), and the other one of
1668 size (vector size / 8). Vectorization factor will 8. If both
1669 access are misaligned by 3, the first one needs one scalar
1670 iteration to be aligned, and the second one needs 5. But the
1671 the first one will be aligned also by peeling 5 scalar
1672 iterations, and in that case both accesses will be aligned.
1673 Hence, except for the immediate peeling amount, we also want
1674 to try to add full vector size, while we don't exceed
1675 vectorization factor.
1676 We do this automtically for cost model, since we calculate cost
1677 for every peeling option. */
1678 if (!flag_vect_cost_model)
1679 possible_npeel_number = vf /nelements;
1681 /* Handle the aligned case. We may decide to align some other
1682 access, making DR unaligned. */
1683 if (DR_MISALIGNMENT (dr) == 0)
1685 npeel_tmp = 0;
1686 if (!flag_vect_cost_model)
1687 possible_npeel_number++;
1690 for (j = 0; j < possible_npeel_number; j++)
1692 gcc_assert (npeel_tmp <= vf);
1693 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1694 npeel_tmp += nelements;
1697 all_misalignments_unknown = false;
1698 /* Data-ref that was chosen for the case that all the
1699 misalignments are unknown is not relevant anymore, since we
1700 have a data-ref with known alignment. */
1701 dr0 = NULL;
1703 else
1705 /* If we don't know all the misalignment values, we prefer
1706 peeling for data-ref that has maximum number of data-refs
1707 with the same alignment, unless the target prefers to align
1708 stores over load. */
1709 if (all_misalignments_unknown)
1711 if (same_align_drs_max < VEC_length (dr_p,
1712 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1713 || !dr0)
1715 same_align_drs_max = VEC_length (dr_p,
1716 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1717 dr0 = dr;
1720 if (!first_store && DR_IS_WRITE (dr))
1721 first_store = dr;
1724 /* If there are both known and unknown misaligned accesses in the
1725 loop, we choose peeling amount according to the known
1726 accesses. */
1729 if (!supportable_dr_alignment)
1731 dr0 = dr;
1732 if (!first_store && DR_IS_WRITE (dr))
1733 first_store = dr;
1737 else
1739 if (!aligned_access_p (dr))
1741 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
1742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1743 "vector alignment may not be reachable");
1744 break;
1749 vect_versioning_for_alias_required
1750 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1752 /* Temporarily, if versioning for alias is required, we disable peeling
1753 until we support peeling and versioning. Often peeling for alignment
1754 will require peeling for loop-bound, which in turn requires that we
1755 know how to adjust the loop ivs after the loop. */
1756 if (vect_versioning_for_alias_required
1757 || !vect_can_advance_ivs_p (loop_vinfo)
1758 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1759 do_peeling = false;
1761 if (do_peeling && all_misalignments_unknown
1762 && vect_supportable_dr_alignment (dr0, false))
1765 /* Check if the target requires to prefer stores over loads, i.e., if
1766 misaligned stores are more expensive than misaligned loads (taking
1767 drs with same alignment into account). */
1768 if (first_store && DR_IS_READ (dr0))
1770 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1771 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1772 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1773 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1774 stmt_vector_for_cost dummy = VEC_alloc (stmt_info_for_cost, heap, 2);
1776 vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1777 &dummy);
1778 vect_get_data_access_cost (first_store, &store_inside_cost,
1779 &store_outside_cost, &dummy);
1781 VEC_free (stmt_info_for_cost, heap, dummy);
1783 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1784 aligning the load DR0). */
1785 load_inside_penalty = store_inside_cost;
1786 load_outside_penalty = store_outside_cost;
1787 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1788 (vinfo_for_stmt (DR_STMT (first_store))),
1789 i, dr);
1790 i++)
1791 if (DR_IS_READ (dr))
1793 load_inside_penalty += load_inside_cost;
1794 load_outside_penalty += load_outside_cost;
1796 else
1798 load_inside_penalty += store_inside_cost;
1799 load_outside_penalty += store_outside_cost;
1802 /* Calculate the penalty for leaving DR0 unaligned (by
1803 aligning the FIRST_STORE). */
1804 store_inside_penalty = load_inside_cost;
1805 store_outside_penalty = load_outside_cost;
1806 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1807 (vinfo_for_stmt (DR_STMT (dr0))),
1808 i, dr);
1809 i++)
1810 if (DR_IS_READ (dr))
1812 store_inside_penalty += load_inside_cost;
1813 store_outside_penalty += load_outside_cost;
1815 else
1817 store_inside_penalty += store_inside_cost;
1818 store_outside_penalty += store_outside_cost;
1821 if (load_inside_penalty > store_inside_penalty
1822 || (load_inside_penalty == store_inside_penalty
1823 && load_outside_penalty > store_outside_penalty))
1824 dr0 = first_store;
1827 /* In case there are only loads with different unknown misalignments, use
1828 peeling only if it may help to align other accesses in the loop. */
1829 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1830 (vinfo_for_stmt (DR_STMT (dr0))))
1831 && vect_supportable_dr_alignment (dr0, false)
1832 != dr_unaligned_supported)
1833 do_peeling = false;
1836 if (do_peeling && !dr0)
1838 /* Peeling is possible, but there is no data access that is not supported
1839 unless aligned. So we try to choose the best possible peeling. */
1841 /* We should get here only if there are drs with known misalignment. */
1842 gcc_assert (!all_misalignments_unknown);
1844 /* Choose the best peeling from the hash table. */
1845 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
1846 &body_cost_vec);
1847 if (!dr0 || !npeel)
1848 do_peeling = false;
1851 if (do_peeling)
1853 stmt = DR_STMT (dr0);
1854 stmt_info = vinfo_for_stmt (stmt);
1855 vectype = STMT_VINFO_VECTYPE (stmt_info);
1856 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1858 if (known_alignment_for_access_p (dr0))
1860 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1861 size_zero_node) < 0;
1862 if (!npeel)
1864 /* Since it's known at compile time, compute the number of
1865 iterations in the peeled loop (the peeling factor) for use in
1866 updating DR_MISALIGNMENT values. The peeling factor is the
1867 vectorization factor minus the misalignment as an element
1868 count. */
1869 mis = DR_MISALIGNMENT (dr0);
1870 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1871 npeel = ((negative ? mis - nelements : nelements - mis)
1872 & (nelements - 1));
1875 /* For interleaved data access every iteration accesses all the
1876 members of the group, therefore we divide the number of iterations
1877 by the group size. */
1878 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1879 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1880 npeel /= GROUP_SIZE (stmt_info);
1882 if (dump_kind_p (MSG_NOTE))
1883 dump_printf_loc (MSG_NOTE, vect_location,
1884 "Try peeling by %d", npeel);
1887 /* Ensure that all data refs can be vectorized after the peel. */
1888 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1890 int save_misalignment;
1892 if (dr == dr0)
1893 continue;
1895 stmt = DR_STMT (dr);
1896 stmt_info = vinfo_for_stmt (stmt);
1897 /* For interleaving, only the alignment of the first access
1898 matters. */
1899 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1900 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1901 continue;
1903 /* Strided loads perform only component accesses, alignment is
1904 irrelevant for them. */
1905 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1906 continue;
1908 save_misalignment = DR_MISALIGNMENT (dr);
1909 vect_update_misalignment_for_peel (dr, dr0, npeel);
1910 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1911 SET_DR_MISALIGNMENT (dr, save_misalignment);
1913 if (!supportable_dr_alignment)
1915 do_peeling = false;
1916 break;
1920 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1922 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1923 if (!stat)
1924 do_peeling = false;
1925 else
1927 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1928 return stat;
1932 if (do_peeling)
1934 stmt_info_for_cost *si;
1935 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
1937 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1938 If the misalignment of DR_i is identical to that of dr0 then set
1939 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1940 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1941 by the peeling factor times the element size of DR_i (MOD the
1942 vectorization factor times the size). Otherwise, the
1943 misalignment of DR_i must be set to unknown. */
1944 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1945 if (dr != dr0)
1946 vect_update_misalignment_for_peel (dr, dr0, npeel);
1948 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1949 if (npeel)
1950 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1951 else
1952 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1953 SET_DR_MISALIGNMENT (dr0, 0);
1954 if (dump_kind_p (MSG_NOTE))
1956 dump_printf_loc (MSG_NOTE, vect_location,
1957 "Alignment of access forced using peeling.");
1958 dump_printf_loc (MSG_NOTE, vect_location,
1959 "Peeling for alignment will be applied.");
1961 /* We've delayed passing the inside-loop peeling costs to the
1962 target cost model until we were sure peeling would happen.
1963 Do so now. */
1964 if (body_cost_vec)
1966 FOR_EACH_VEC_ELT (stmt_info_for_cost, body_cost_vec, i, si)
1968 struct _stmt_vec_info *stmt_info
1969 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1970 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1971 si->misalign, vect_body);
1973 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1976 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1977 gcc_assert (stat);
1978 return stat;
1982 VEC_free (stmt_info_for_cost, heap, body_cost_vec);
1984 /* (2) Versioning to force alignment. */
1986 /* Try versioning if:
1987 1) flag_tree_vect_loop_version is TRUE
1988 2) optimize loop for speed
1989 3) there is at least one unsupported misaligned data ref with an unknown
1990 misalignment, and
1991 4) all misaligned data refs with a known misalignment are supported, and
1992 5) the number of runtime alignment checks is within reason. */
1994 do_versioning =
1995 flag_tree_vect_loop_version
1996 && optimize_loop_nest_for_speed_p (loop)
1997 && (!loop->inner); /* FORNOW */
1999 if (do_versioning)
2001 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2003 stmt = DR_STMT (dr);
2004 stmt_info = vinfo_for_stmt (stmt);
2006 /* For interleaving, only the alignment of the first access
2007 matters. */
2008 if (aligned_access_p (dr)
2009 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
2010 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
2011 continue;
2013 /* Strided loads perform only component accesses, alignment is
2014 irrelevant for them. */
2015 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
2016 continue;
2018 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
2020 if (!supportable_dr_alignment)
2022 gimple stmt;
2023 int mask;
2024 tree vectype;
2026 if (known_alignment_for_access_p (dr)
2027 || VEC_length (gimple,
2028 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
2029 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2031 do_versioning = false;
2032 break;
2035 stmt = DR_STMT (dr);
2036 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2037 gcc_assert (vectype);
2039 /* The rightmost bits of an aligned address must be zeros.
2040 Construct the mask needed for this test. For example,
2041 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2042 mask must be 15 = 0xf. */
2043 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2045 /* FORNOW: use the same mask to test all potentially unaligned
2046 references in the loop. The vectorizer currently supports
2047 a single vector size, see the reference to
2048 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2049 vectorization factor is computed. */
2050 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2051 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2052 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2053 VEC_safe_push (gimple, heap,
2054 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2055 DR_STMT (dr));
2059 /* Versioning requires at least one misaligned data reference. */
2060 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2061 do_versioning = false;
2062 else if (!do_versioning)
2063 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2066 if (do_versioning)
2068 VEC(gimple,heap) *may_misalign_stmts
2069 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2070 gimple stmt;
2072 /* It can now be assumed that the data references in the statements
2073 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2074 of the loop being vectorized. */
2075 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
2077 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2078 dr = STMT_VINFO_DATA_REF (stmt_info);
2079 SET_DR_MISALIGNMENT (dr, 0);
2080 if (dump_kind_p (MSG_NOTE))
2081 dump_printf_loc (MSG_NOTE, vect_location,
2082 "Alignment of access forced using versioning.");
2085 if (dump_kind_p (MSG_NOTE))
2086 dump_printf_loc (MSG_NOTE, vect_location,
2087 "Versioning for alignment will be applied.");
2089 /* Peeling and versioning can't be done together at this time. */
2090 gcc_assert (! (do_peeling && do_versioning));
2092 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2093 gcc_assert (stat);
2094 return stat;
2097 /* This point is reached if neither peeling nor versioning is being done. */
2098 gcc_assert (! (do_peeling || do_versioning));
2100 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
2101 return stat;
2105 /* Function vect_find_same_alignment_drs.
2107 Update group and alignment relations according to the chosen
2108 vectorization factor. */
2110 static void
2111 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
2112 loop_vec_info loop_vinfo)
2114 unsigned int i;
2115 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2116 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2117 struct data_reference *dra = DDR_A (ddr);
2118 struct data_reference *drb = DDR_B (ddr);
2119 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2120 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2121 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
2122 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
2123 lambda_vector dist_v;
2124 unsigned int loop_depth;
2126 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2127 return;
2129 if (dra == drb)
2130 return;
2132 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2133 return;
2135 /* Loop-based vectorization and known data dependence. */
2136 if (DDR_NUM_DIST_VECTS (ddr) == 0)
2137 return;
2139 /* Data-dependence analysis reports a distance vector of zero
2140 for data-references that overlap only in the first iteration
2141 but have different sign step (see PR45764).
2142 So as a sanity check require equal DR_STEP. */
2143 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2144 return;
2146 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
2147 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
2149 int dist = dist_v[loop_depth];
2151 if (dump_kind_p (MSG_NOTE))
2152 dump_printf_loc (MSG_NOTE, vect_location,
2153 "dependence distance = %d.", dist);
2155 /* Same loop iteration. */
2156 if (dist == 0
2157 || (dist % vectorization_factor == 0 && dra_size == drb_size))
2159 /* Two references with distance zero have the same alignment. */
2160 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
2161 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
2162 if (dump_kind_p (MSG_NOTE))
2164 dump_printf_loc (MSG_NOTE, vect_location,
2165 "accesses have the same alignment.");
2166 dump_printf (MSG_NOTE,
2167 "dependence distance modulo vf == 0 between ");
2168 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2169 dump_printf (MSG_NOTE, " and ");
2170 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
2177 /* Function vect_analyze_data_refs_alignment
2179 Analyze the alignment of the data-references in the loop.
2180 Return FALSE if a data reference is found that cannot be vectorized. */
2182 bool
2183 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2184 bb_vec_info bb_vinfo)
2186 if (dump_kind_p (MSG_NOTE))
2187 dump_printf_loc (MSG_NOTE, vect_location,
2188 "=== vect_analyze_data_refs_alignment ===");
2190 /* Mark groups of data references with same alignment using
2191 data dependence information. */
2192 if (loop_vinfo)
2194 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2195 struct data_dependence_relation *ddr;
2196 unsigned int i;
2198 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2199 vect_find_same_alignment_drs (ddr, loop_vinfo);
2202 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2204 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2206 "not vectorized: can't calculate alignment "
2207 "for data ref.");
2208 return false;
2211 return true;
2215 /* Analyze groups of accesses: check that DR belongs to a group of
2216 accesses of legal size, step, etc. Detect gaps, single element
2217 interleaving, and other special cases. Set grouped access info.
2218 Collect groups of strided stores for further use in SLP analysis. */
2220 static bool
2221 vect_analyze_group_access (struct data_reference *dr)
2223 tree step = DR_STEP (dr);
2224 tree scalar_type = TREE_TYPE (DR_REF (dr));
2225 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2226 gimple stmt = DR_STMT (dr);
2227 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2228 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2229 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2230 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2231 HOST_WIDE_INT groupsize, last_accessed_element = 1;
2232 bool slp_impossible = false;
2233 struct loop *loop = NULL;
2235 if (loop_vinfo)
2236 loop = LOOP_VINFO_LOOP (loop_vinfo);
2238 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2239 size of the interleaving group (including gaps). */
2240 groupsize = dr_step / type_size;
2242 /* Not consecutive access is possible only if it is a part of interleaving. */
2243 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2245 /* Check if it this DR is a part of interleaving, and is a single
2246 element of the group that is accessed in the loop. */
2248 /* Gaps are supported only for loads. STEP must be a multiple of the type
2249 size. The size of the group must be a power of 2. */
2250 if (DR_IS_READ (dr)
2251 && (dr_step % type_size) == 0
2252 && groupsize > 0
2253 && exact_log2 (groupsize) != -1)
2255 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2256 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2257 if (dump_kind_p (MSG_NOTE))
2259 dump_printf_loc (MSG_NOTE, vect_location,
2260 "Detected single element interleaving ");
2261 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2262 dump_printf (MSG_NOTE, " step ");
2263 dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
2266 if (loop_vinfo)
2268 if (dump_kind_p (MSG_NOTE))
2269 dump_printf_loc (MSG_NOTE, vect_location,
2270 "Data access with gaps requires scalar "
2271 "epilogue loop");
2272 if (loop->inner)
2274 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2276 "Peeling for outer loop is not"
2277 " supported");
2278 return false;
2281 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2284 return true;
2287 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2290 "not consecutive access ");
2291 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2294 if (bb_vinfo)
2296 /* Mark the statement as unvectorizable. */
2297 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2298 return true;
2301 return false;
2304 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2306 /* First stmt in the interleaving chain. Check the chain. */
2307 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2308 struct data_reference *data_ref = dr;
2309 unsigned int count = 1;
2310 tree next_step;
2311 tree prev_init = DR_INIT (data_ref);
2312 gimple prev = stmt;
2313 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2315 while (next)
2317 /* Skip same data-refs. In case that two or more stmts share
2318 data-ref (supported only for loads), we vectorize only the first
2319 stmt, and the rest get their vectorized loads from the first
2320 one. */
2321 if (!tree_int_cst_compare (DR_INIT (data_ref),
2322 DR_INIT (STMT_VINFO_DATA_REF (
2323 vinfo_for_stmt (next)))))
2325 if (DR_IS_WRITE (data_ref))
2327 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2329 "Two store stmts share the same dr.");
2330 return false;
2333 /* Check that there is no load-store dependencies for this loads
2334 to prevent a case of load-store-load to the same location. */
2335 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2336 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2338 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2340 "READ_WRITE dependence in interleaving.");
2341 return false;
2344 /* For load use the same data-ref load. */
2345 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2347 prev = next;
2348 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2349 continue;
2352 prev = next;
2354 /* Check that all the accesses have the same STEP. */
2355 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2356 if (tree_int_cst_compare (step, next_step))
2358 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2360 "not consecutive access in interleaving");
2361 return false;
2364 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2365 /* Check that the distance between two accesses is equal to the type
2366 size. Otherwise, we have gaps. */
2367 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2368 - TREE_INT_CST_LOW (prev_init)) / type_size;
2369 if (diff != 1)
2371 /* FORNOW: SLP of accesses with gaps is not supported. */
2372 slp_impossible = true;
2373 if (DR_IS_WRITE (data_ref))
2375 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377 "interleaved store with gaps");
2378 return false;
2381 gaps += diff - 1;
2384 last_accessed_element += diff;
2386 /* Store the gap from the previous member of the group. If there is no
2387 gap in the access, GROUP_GAP is always 1. */
2388 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2390 prev_init = DR_INIT (data_ref);
2391 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2392 /* Count the number of data-refs in the chain. */
2393 count++;
2396 /* COUNT is the number of accesses found, we multiply it by the size of
2397 the type to get COUNT_IN_BYTES. */
2398 count_in_bytes = type_size * count;
2400 /* Check that the size of the interleaving (including gaps) is not
2401 greater than STEP. */
2402 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2404 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "interleaving size is greater than step for ");
2408 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dr));
2410 return false;
2413 /* Check that the size of the interleaving is equal to STEP for stores,
2414 i.e., that there are no gaps. */
2415 if (dr_step && dr_step != count_in_bytes)
2417 if (DR_IS_READ (dr))
2419 slp_impossible = true;
2420 /* There is a gap after the last load in the group. This gap is a
2421 difference between the groupsize and the number of elements.
2422 When there is no gap, this difference should be 0. */
2423 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
2425 else
2427 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "interleaved store with gaps");
2430 return false;
2434 /* Check that STEP is a multiple of type size. */
2435 if (dr_step && (dr_step % type_size) != 0)
2437 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2440 "step is not a multiple of type size: step ");
2441 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
2442 dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
2443 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2444 TYPE_SIZE_UNIT (scalar_type));
2446 return false;
2449 if (groupsize == 0)
2450 groupsize = count;
2452 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
2453 if (dump_kind_p (MSG_NOTE))
2454 dump_printf_loc (MSG_NOTE, vect_location,
2455 "Detected interleaving of size %d", (int)groupsize);
2457 /* SLP: create an SLP data structure for every interleaving group of
2458 stores for further analysis in vect_analyse_slp. */
2459 if (DR_IS_WRITE (dr) && !slp_impossible)
2461 if (loop_vinfo)
2462 VEC_safe_push (gimple, heap, LOOP_VINFO_GROUPED_STORES (loop_vinfo),
2463 stmt);
2464 if (bb_vinfo)
2465 VEC_safe_push (gimple, heap, BB_VINFO_GROUPED_STORES (bb_vinfo),
2466 stmt);
2469 /* There is a gap in the end of the group. */
2470 if (groupsize - last_accessed_element > 0 && loop_vinfo)
2472 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2474 "Data access with gaps requires scalar "
2475 "epilogue loop");
2476 if (loop->inner)
2478 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2480 "Peeling for outer loop is not supported");
2481 return false;
2484 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2488 return true;
2492 /* Analyze the access pattern of the data-reference DR.
2493 In case of non-consecutive accesses call vect_analyze_group_access() to
2494 analyze groups of accesses. */
2496 static bool
2497 vect_analyze_data_ref_access (struct data_reference *dr)
2499 tree step = DR_STEP (dr);
2500 tree scalar_type = TREE_TYPE (DR_REF (dr));
2501 gimple stmt = DR_STMT (dr);
2502 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2503 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2504 struct loop *loop = NULL;
2506 if (loop_vinfo)
2507 loop = LOOP_VINFO_LOOP (loop_vinfo);
2509 if (loop_vinfo && !step)
2511 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2513 "bad data-ref access in loop");
2514 return false;
2517 /* Allow invariant loads in loops. */
2518 if (loop_vinfo && integer_zerop (step))
2520 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2521 return DR_IS_READ (dr);
2524 if (loop && nested_in_vect_loop_p (loop, stmt))
2526 /* Interleaved accesses are not yet supported within outer-loop
2527 vectorization for references in the inner-loop. */
2528 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2530 /* For the rest of the analysis we use the outer-loop step. */
2531 step = STMT_VINFO_DR_STEP (stmt_info);
2532 if (integer_zerop (step))
2534 if (dump_kind_p (MSG_NOTE))
2535 dump_printf_loc (MSG_NOTE, vect_location,
2536 "zero step in outer loop.");
2537 if (DR_IS_READ (dr))
2538 return true;
2539 else
2540 return false;
2544 /* Consecutive? */
2545 if (TREE_CODE (step) == INTEGER_CST)
2547 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2548 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2549 || (dr_step < 0
2550 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2552 /* Mark that it is not interleaving. */
2553 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2554 return true;
2558 if (loop && nested_in_vect_loop_p (loop, stmt))
2560 if (dump_kind_p (MSG_NOTE))
2561 dump_printf_loc (MSG_NOTE, vect_location,
2562 "grouped access in outer loop.");
2563 return false;
2566 /* Assume this is a DR handled by non-constant strided load case. */
2567 if (TREE_CODE (step) != INTEGER_CST)
2568 return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2570 /* Not consecutive access - check if it's a part of interleaving group. */
2571 return vect_analyze_group_access (dr);
2575 /* Function vect_analyze_data_ref_accesses.
2577 Analyze the access pattern of all the data references in the loop.
2579 FORNOW: the only access pattern that is considered vectorizable is a
2580 simple step 1 (consecutive) access.
2582 FORNOW: handle only arrays and pointer accesses. */
2584 bool
2585 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2587 unsigned int i;
2588 VEC (data_reference_p, heap) *datarefs;
2589 struct data_reference *dr;
2591 if (dump_kind_p (MSG_NOTE))
2592 dump_printf_loc (MSG_NOTE, vect_location,
2593 "=== vect_analyze_data_ref_accesses ===");
2595 if (loop_vinfo)
2596 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2597 else
2598 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2600 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2601 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2602 && !vect_analyze_data_ref_access (dr))
2604 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2605 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2606 "not vectorized: complicated access pattern.");
2608 if (bb_vinfo)
2610 /* Mark the statement as not vectorizable. */
2611 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2612 continue;
2614 else
2615 return false;
2618 return true;
2621 /* Function vect_prune_runtime_alias_test_list.
2623 Prune a list of ddrs to be tested at run-time by versioning for alias.
2624 Return FALSE if resulting list of ddrs is longer then allowed by
2625 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2627 bool
2628 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2630 VEC (ddr_p, heap) * ddrs =
2631 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2632 unsigned i, j;
2634 if (dump_kind_p (MSG_NOTE))
2635 dump_printf_loc (MSG_NOTE, vect_location,
2636 "=== vect_prune_runtime_alias_test_list ===");
2638 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2640 bool found;
2641 ddr_p ddr_i;
2643 ddr_i = VEC_index (ddr_p, ddrs, i);
2644 found = false;
2646 for (j = 0; j < i; j++)
2648 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2650 if (vect_vfa_range_equal (ddr_i, ddr_j))
2652 if (dump_kind_p (MSG_NOTE))
2654 dump_printf_loc (MSG_NOTE, vect_location,
2655 "found equal ranges ");
2656 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr_i)));
2657 dump_printf (MSG_NOTE, ", ");
2658 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr_i)));
2659 dump_printf (MSG_NOTE, " and ");
2660 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr_j)));
2661 dump_printf (MSG_NOTE, ", ");
2662 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr_j)));
2664 found = true;
2665 break;
2669 if (found)
2671 VEC_ordered_remove (ddr_p, ddrs, i);
2672 continue;
2674 i++;
2677 if (VEC_length (ddr_p, ddrs) >
2678 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2680 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2683 "disable versioning for alias - max number of "
2684 "generated checks exceeded.");
2687 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2689 return false;
2692 return true;
2695 /* Check whether a non-affine read in stmt is suitable for gather load
2696 and if so, return a builtin decl for that operation. */
2698 tree
2699 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2700 tree *offp, int *scalep)
2702 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2703 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2705 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2706 tree offtype = NULL_TREE;
2707 tree decl, base, off;
2708 enum machine_mode pmode;
2709 int punsignedp, pvolatilep;
2711 /* The gather builtins need address of the form
2712 loop_invariant + vector * {1, 2, 4, 8}
2714 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2715 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2716 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2717 multiplications and additions in it. To get a vector, we need
2718 a single SSA_NAME that will be defined in the loop and will
2719 contain everything that is not loop invariant and that can be
2720 vectorized. The following code attempts to find such a preexistng
2721 SSA_NAME OFF and put the loop invariants into a tree BASE
2722 that can be gimplified before the loop. */
2723 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2724 &pmode, &punsignedp, &pvolatilep, false);
2725 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2727 if (TREE_CODE (base) == MEM_REF)
2729 if (!integer_zerop (TREE_OPERAND (base, 1)))
2731 if (off == NULL_TREE)
2733 double_int moff = mem_ref_offset (base);
2734 off = double_int_to_tree (sizetype, moff);
2736 else
2737 off = size_binop (PLUS_EXPR, off,
2738 fold_convert (sizetype, TREE_OPERAND (base, 1)));
2740 base = TREE_OPERAND (base, 0);
2742 else
2743 base = build_fold_addr_expr (base);
2745 if (off == NULL_TREE)
2746 off = size_zero_node;
2748 /* If base is not loop invariant, either off is 0, then we start with just
2749 the constant offset in the loop invariant BASE and continue with base
2750 as OFF, otherwise give up.
2751 We could handle that case by gimplifying the addition of base + off
2752 into some SSA_NAME and use that as off, but for now punt. */
2753 if (!expr_invariant_in_loop_p (loop, base))
2755 if (!integer_zerop (off))
2756 return NULL_TREE;
2757 off = base;
2758 base = size_int (pbitpos / BITS_PER_UNIT);
2760 /* Otherwise put base + constant offset into the loop invariant BASE
2761 and continue with OFF. */
2762 else
2764 base = fold_convert (sizetype, base);
2765 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2768 /* OFF at this point may be either a SSA_NAME or some tree expression
2769 from get_inner_reference. Try to peel off loop invariants from it
2770 into BASE as long as possible. */
2771 STRIP_NOPS (off);
2772 while (offtype == NULL_TREE)
2774 enum tree_code code;
2775 tree op0, op1, add = NULL_TREE;
2777 if (TREE_CODE (off) == SSA_NAME)
2779 gimple def_stmt = SSA_NAME_DEF_STMT (off);
2781 if (expr_invariant_in_loop_p (loop, off))
2782 return NULL_TREE;
2784 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2785 break;
2787 op0 = gimple_assign_rhs1 (def_stmt);
2788 code = gimple_assign_rhs_code (def_stmt);
2789 op1 = gimple_assign_rhs2 (def_stmt);
2791 else
2793 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2794 return NULL_TREE;
2795 code = TREE_CODE (off);
2796 extract_ops_from_tree (off, &code, &op0, &op1);
2798 switch (code)
2800 case POINTER_PLUS_EXPR:
2801 case PLUS_EXPR:
2802 if (expr_invariant_in_loop_p (loop, op0))
2804 add = op0;
2805 off = op1;
2806 do_add:
2807 add = fold_convert (sizetype, add);
2808 if (scale != 1)
2809 add = size_binop (MULT_EXPR, add, size_int (scale));
2810 base = size_binop (PLUS_EXPR, base, add);
2811 continue;
2813 if (expr_invariant_in_loop_p (loop, op1))
2815 add = op1;
2816 off = op0;
2817 goto do_add;
2819 break;
2820 case MINUS_EXPR:
2821 if (expr_invariant_in_loop_p (loop, op1))
2823 add = fold_convert (sizetype, op1);
2824 add = size_binop (MINUS_EXPR, size_zero_node, add);
2825 off = op0;
2826 goto do_add;
2828 break;
2829 case MULT_EXPR:
2830 if (scale == 1 && host_integerp (op1, 0))
2832 scale = tree_low_cst (op1, 0);
2833 off = op0;
2834 continue;
2836 break;
2837 case SSA_NAME:
2838 off = op0;
2839 continue;
2840 CASE_CONVERT:
2841 if (!POINTER_TYPE_P (TREE_TYPE (op0))
2842 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2843 break;
2844 if (TYPE_PRECISION (TREE_TYPE (op0))
2845 == TYPE_PRECISION (TREE_TYPE (off)))
2847 off = op0;
2848 continue;
2850 if (TYPE_PRECISION (TREE_TYPE (op0))
2851 < TYPE_PRECISION (TREE_TYPE (off)))
2853 off = op0;
2854 offtype = TREE_TYPE (off);
2855 STRIP_NOPS (off);
2856 continue;
2858 break;
2859 default:
2860 break;
2862 break;
2865 /* If at the end OFF still isn't a SSA_NAME or isn't
2866 defined in the loop, punt. */
2867 if (TREE_CODE (off) != SSA_NAME
2868 || expr_invariant_in_loop_p (loop, off))
2869 return NULL_TREE;
2871 if (offtype == NULL_TREE)
2872 offtype = TREE_TYPE (off);
2874 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2875 offtype, scale);
2876 if (decl == NULL_TREE)
2877 return NULL_TREE;
2879 if (basep)
2880 *basep = base;
2881 if (offp)
2882 *offp = off;
2883 if (scalep)
2884 *scalep = scale;
2885 return decl;
2888 /* Check wether a non-affine load in STMT (being in the loop referred to
2889 in LOOP_VINFO) is suitable for handling as strided load. That is the case
2890 if its address is a simple induction variable. If so return the base
2891 of that induction variable in *BASEP and the (loop-invariant) step
2892 in *STEPP, both only when that pointer is non-zero.
2894 This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
2895 base pointer) only. */
2897 bool
2898 vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2899 tree *stepp)
2901 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2902 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2903 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2904 tree base, off;
2905 affine_iv iv;
2907 if (!DR_IS_READ (dr))
2908 return false;
2910 base = DR_REF (dr);
2912 if (TREE_CODE (base) == ARRAY_REF)
2914 off = TREE_OPERAND (base, 1);
2915 base = TREE_OPERAND (base, 0);
2917 else if (TREE_CODE (base) == MEM_REF)
2919 off = TREE_OPERAND (base, 0);
2920 base = TREE_OPERAND (base, 1);
2922 else
2923 return false;
2925 if (TREE_CODE (off) != SSA_NAME)
2926 return false;
2928 if (!expr_invariant_in_loop_p (loop, base)
2929 || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
2930 return false;
2932 if (basep)
2933 *basep = iv.base;
2934 if (stepp)
2935 *stepp = iv.step;
2936 return true;
2939 /* Function vect_analyze_data_refs.
2941 Find all the data references in the loop or basic block.
2943 The general structure of the analysis of data refs in the vectorizer is as
2944 follows:
2945 1- vect_analyze_data_refs(loop/bb): call
2946 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2947 in the loop/bb and their dependences.
2948 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2949 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2950 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2954 bool
2955 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2956 bb_vec_info bb_vinfo,
2957 int *min_vf)
2959 struct loop *loop = NULL;
2960 basic_block bb = NULL;
2961 unsigned int i;
2962 VEC (data_reference_p, heap) *datarefs;
2963 struct data_reference *dr;
2964 tree scalar_type;
2965 bool res, stop_bb_analysis = false;
2967 if (dump_kind_p (MSG_NOTE))
2968 dump_printf_loc (MSG_NOTE, vect_location,
2969 "=== vect_analyze_data_refs ===\n");
2971 if (loop_vinfo)
2973 loop = LOOP_VINFO_LOOP (loop_vinfo);
2974 res = compute_data_dependences_for_loop
2975 (loop, true,
2976 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2977 &LOOP_VINFO_DATAREFS (loop_vinfo),
2978 &LOOP_VINFO_DDRS (loop_vinfo));
2980 if (!res)
2982 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
2983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2984 "not vectorized: loop contains function calls"
2985 " or data references that cannot be analyzed");
2986 return false;
2989 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2991 else
2993 gimple_stmt_iterator gsi;
2995 bb = BB_VINFO_BB (bb_vinfo);
2996 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2998 gimple stmt = gsi_stmt (gsi);
2999 if (!find_data_references_in_stmt (NULL, stmt,
3000 &BB_VINFO_DATAREFS (bb_vinfo)))
3002 /* Mark the rest of the basic-block as unvectorizable. */
3003 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3005 stmt = gsi_stmt (gsi);
3006 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
3008 break;
3011 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
3012 &BB_VINFO_DDRS (bb_vinfo), NULL, true))
3014 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3015 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3016 "not vectorized: basic block contains function"
3017 " calls or data references that cannot be"
3018 " analyzed");
3019 return false;
3022 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
3025 /* Go through the data-refs, check that the analysis succeeded. Update
3026 pointer from stmt_vec_info struct to DR and vectype. */
3028 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
3030 gimple stmt;
3031 stmt_vec_info stmt_info;
3032 tree base, offset, init;
3033 bool gather = false;
3034 int vf;
3036 if (!dr || !DR_REF (dr))
3038 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3040 "not vectorized: unhandled data-ref ");
3041 return false;
3044 stmt = DR_STMT (dr);
3045 stmt_info = vinfo_for_stmt (stmt);
3047 if (stop_bb_analysis)
3049 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3050 continue;
3053 /* Check that analysis of the data-ref succeeded. */
3054 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3055 || !DR_STEP (dr))
3057 /* If target supports vector gather loads, see if they can't
3058 be used. */
3059 if (loop_vinfo
3060 && DR_IS_READ (dr)
3061 && !TREE_THIS_VOLATILE (DR_REF (dr))
3062 && targetm.vectorize.builtin_gather != NULL
3063 && !nested_in_vect_loop_p (loop, stmt))
3065 struct data_reference *newdr
3066 = create_data_ref (NULL, loop_containing_stmt (stmt),
3067 DR_REF (dr), stmt, true);
3068 gcc_assert (newdr != NULL && DR_REF (newdr));
3069 if (DR_BASE_ADDRESS (newdr)
3070 && DR_OFFSET (newdr)
3071 && DR_INIT (newdr)
3072 && DR_STEP (newdr)
3073 && integer_zerop (DR_STEP (newdr)))
3075 dr = newdr;
3076 gather = true;
3078 else
3079 free_data_ref (newdr);
3082 if (!gather)
3084 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3086 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3087 "not vectorized: data ref analysis "
3088 "failed ");
3089 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3092 if (bb_vinfo)
3094 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3095 stop_bb_analysis = true;
3096 continue;
3099 return false;
3103 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3105 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3106 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3107 "not vectorized: base addr of dr is a "
3108 "constant");
3110 if (bb_vinfo)
3112 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3113 stop_bb_analysis = true;
3114 continue;
3117 if (gather)
3118 free_data_ref (dr);
3119 return false;
3122 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3124 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3127 "not vectorized: volatile type ");
3128 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3131 if (bb_vinfo)
3133 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3134 stop_bb_analysis = true;
3135 continue;
3138 return false;
3141 if (stmt_can_throw_internal (stmt))
3143 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3146 "not vectorized: statement can throw an "
3147 "exception ");
3148 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3151 if (bb_vinfo)
3153 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3154 stop_bb_analysis = true;
3155 continue;
3158 if (gather)
3159 free_data_ref (dr);
3160 return false;
3163 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3164 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3166 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3169 "not vectorized: statement is bitfield "
3170 "access ");
3171 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3174 if (bb_vinfo)
3176 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3177 stop_bb_analysis = true;
3178 continue;
3181 if (gather)
3182 free_data_ref (dr);
3183 return false;
3186 base = unshare_expr (DR_BASE_ADDRESS (dr));
3187 offset = unshare_expr (DR_OFFSET (dr));
3188 init = unshare_expr (DR_INIT (dr));
3190 if (is_gimple_call (stmt))
3192 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3195 "not vectorized: dr in a call ");
3196 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3199 if (bb_vinfo)
3201 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3202 stop_bb_analysis = true;
3203 continue;
3206 if (gather)
3207 free_data_ref (dr);
3208 return false;
3211 /* Update DR field in stmt_vec_info struct. */
3213 /* If the dataref is in an inner-loop of the loop that is considered for
3214 for vectorization, we also want to analyze the access relative to
3215 the outer-loop (DR contains information only relative to the
3216 inner-most enclosing loop). We do that by building a reference to the
3217 first location accessed by the inner-loop, and analyze it relative to
3218 the outer-loop. */
3219 if (loop && nested_in_vect_loop_p (loop, stmt))
3221 tree outer_step, outer_base, outer_init;
3222 HOST_WIDE_INT pbitsize, pbitpos;
3223 tree poffset;
3224 enum machine_mode pmode;
3225 int punsignedp, pvolatilep;
3226 affine_iv base_iv, offset_iv;
3227 tree dinit;
3229 /* Build a reference to the first location accessed by the
3230 inner-loop: *(BASE+INIT). (The first location is actually
3231 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3232 tree inner_base = build_fold_indirect_ref
3233 (fold_build_pointer_plus (base, init));
3235 if (dump_kind_p (MSG_NOTE))
3237 dump_printf_loc (MSG_NOTE, vect_location,
3238 "analyze in outer-loop: ");
3239 dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
3242 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3243 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3244 gcc_assert (outer_base != NULL_TREE);
3246 if (pbitpos % BITS_PER_UNIT != 0)
3248 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3250 "failed: bit offset alignment.\n");
3251 return false;
3254 outer_base = build_fold_addr_expr (outer_base);
3255 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
3256 &base_iv, false))
3258 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3259 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3260 "failed: evolution of base is not affine.\n");
3261 return false;
3264 if (offset)
3266 if (poffset)
3267 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
3268 poffset);
3269 else
3270 poffset = offset;
3273 if (!poffset)
3275 offset_iv.base = ssize_int (0);
3276 offset_iv.step = ssize_int (0);
3278 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
3279 &offset_iv, false))
3281 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3283 "evolution of offset is not affine.\n");
3284 return false;
3287 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3288 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3289 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3290 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3291 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3293 outer_step = size_binop (PLUS_EXPR,
3294 fold_convert (ssizetype, base_iv.step),
3295 fold_convert (ssizetype, offset_iv.step));
3297 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3298 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3299 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3300 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3301 STMT_VINFO_DR_OFFSET (stmt_info) =
3302 fold_convert (ssizetype, offset_iv.base);
3303 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3304 size_int (highest_pow2_factor (offset_iv.base));
3306 if (dump_kind_p (MSG_NOTE))
3308 dump_printf_loc (MSG_NOTE, vect_location,
3309 "\touter base_address: ");
3310 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3311 STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3312 dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
3313 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3314 STMT_VINFO_DR_OFFSET (stmt_info));
3315 dump_printf (MSG_NOTE,
3316 "\n\touter constant offset from base address: ");
3317 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3318 STMT_VINFO_DR_INIT (stmt_info));
3319 dump_printf (MSG_NOTE, "\n\touter step: ");
3320 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3321 STMT_VINFO_DR_STEP (stmt_info));
3322 dump_printf (MSG_NOTE, "\n\touter aligned to: ");
3323 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3324 STMT_VINFO_DR_ALIGNED_TO (stmt_info));
3328 if (STMT_VINFO_DATA_REF (stmt_info))
3330 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3333 "not vectorized: more than one data ref "
3334 "in stmt: ");
3335 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3338 if (bb_vinfo)
3340 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3341 stop_bb_analysis = true;
3342 continue;
3345 if (gather)
3346 free_data_ref (dr);
3347 return false;
3350 STMT_VINFO_DATA_REF (stmt_info) = dr;
3352 /* Set vectype for STMT. */
3353 scalar_type = TREE_TYPE (DR_REF (dr));
3354 STMT_VINFO_VECTYPE (stmt_info) =
3355 get_vectype_for_scalar_type (scalar_type);
3356 if (!STMT_VINFO_VECTYPE (stmt_info))
3358 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3361 "not vectorized: no vectype for stmt: ");
3362 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3363 dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
3364 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
3365 scalar_type);
3368 if (bb_vinfo)
3370 /* Mark the statement as not vectorizable. */
3371 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3372 stop_bb_analysis = true;
3373 continue;
3376 if (gather)
3378 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3379 free_data_ref (dr);
3381 return false;
3384 /* Adjust the minimal vectorization factor according to the
3385 vector type. */
3386 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3387 if (vf > *min_vf)
3388 *min_vf = vf;
3390 if (gather)
3392 unsigned int j, k, n;
3393 struct data_reference *olddr
3394 = VEC_index (data_reference_p, datarefs, i);
3395 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3396 struct data_dependence_relation *ddr, *newddr;
3397 bool bad = false;
3398 tree off;
3399 VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3401 gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3402 if (gather
3403 && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3404 gather = false;
3405 if (!gather)
3407 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3408 free_data_ref (dr);
3409 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3412 "not vectorized: not suitable for gather "
3413 "load ");
3414 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3416 return false;
3419 n = VEC_length (data_reference_p, datarefs) - 1;
3420 for (j = 0, k = i - 1; j < i; j++)
3422 ddr = VEC_index (ddr_p, ddrs, k);
3423 gcc_assert (DDR_B (ddr) == olddr);
3424 newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3425 nest);
3426 VEC_replace (ddr_p, ddrs, k, newddr);
3427 free_dependence_relation (ddr);
3428 if (!bad
3429 && DR_IS_WRITE (DDR_A (newddr))
3430 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3431 bad = true;
3432 k += --n;
3435 k++;
3436 n = k + VEC_length (data_reference_p, datarefs) - i - 1;
3437 for (; k < n; k++)
3439 ddr = VEC_index (ddr_p, ddrs, k);
3440 gcc_assert (DDR_A (ddr) == olddr);
3441 newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3442 nest);
3443 VEC_replace (ddr_p, ddrs, k, newddr);
3444 free_dependence_relation (ddr);
3445 if (!bad
3446 && DR_IS_WRITE (DDR_B (newddr))
3447 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3448 bad = true;
3451 k = VEC_length (ddr_p, ddrs)
3452 - VEC_length (data_reference_p, datarefs) + i;
3453 ddr = VEC_index (ddr_p, ddrs, k);
3454 gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3455 newddr = initialize_data_dependence_relation (dr, dr, nest);
3456 VEC_replace (ddr_p, ddrs, k, newddr);
3457 free_dependence_relation (ddr);
3458 VEC_replace (data_reference_p, datarefs, i, dr);
3460 if (bad)
3462 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3465 "not vectorized: data dependence conflict"
3466 " prevents gather load");
3467 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3469 return false;
3472 STMT_VINFO_GATHER_P (stmt_info) = true;
3474 else if (loop_vinfo
3475 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3477 bool strided_load = false;
3478 if (!nested_in_vect_loop_p (loop, stmt))
3479 strided_load
3480 = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
3481 if (!strided_load)
3483 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
3485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3486 "not vectorized: not suitable for strided "
3487 "load ");
3488 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3490 return false;
3492 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
3496 return true;
3500 /* Function vect_get_new_vect_var.
3502 Returns a name for a new variable. The current naming scheme appends the
3503 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3504 the name of vectorizer generated variables, and appends that to NAME if
3505 provided. */
3507 tree
3508 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3510 const char *prefix;
3511 tree new_vect_var;
3513 switch (var_kind)
3515 case vect_simple_var:
3516 prefix = "vect_";
3517 break;
3518 case vect_scalar_var:
3519 prefix = "stmp_";
3520 break;
3521 case vect_pointer_var:
3522 prefix = "vect_p";
3523 break;
3524 default:
3525 gcc_unreachable ();
3528 if (name)
3530 char* tmp = concat (prefix, name, NULL);
3531 new_vect_var = create_tmp_reg (type, tmp);
3532 free (tmp);
3534 else
3535 new_vect_var = create_tmp_reg (type, prefix);
3537 return new_vect_var;
3541 /* Function vect_create_addr_base_for_vector_ref.
3543 Create an expression that computes the address of the first memory location
3544 that will be accessed for a data reference.
3546 Input:
3547 STMT: The statement containing the data reference.
3548 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3549 OFFSET: Optional. If supplied, it is be added to the initial address.
3550 LOOP: Specify relative to which loop-nest should the address be computed.
3551 For example, when the dataref is in an inner-loop nested in an
3552 outer-loop that is now being vectorized, LOOP can be either the
3553 outer-loop, or the inner-loop. The first memory location accessed
3554 by the following dataref ('in' points to short):
3556 for (i=0; i<N; i++)
3557 for (j=0; j<M; j++)
3558 s += in[i+j]
3560 is as follows:
3561 if LOOP=i_loop: &in (relative to i_loop)
3562 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3564 Output:
3565 1. Return an SSA_NAME whose value is the address of the memory location of
3566 the first vector of the data reference.
3567 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3568 these statement(s) which define the returned SSA_NAME.
3570 FORNOW: We are only handling array accesses with step 1. */
3572 tree
3573 vect_create_addr_base_for_vector_ref (gimple stmt,
3574 gimple_seq *new_stmt_list,
3575 tree offset,
3576 struct loop *loop)
3578 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3579 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3580 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3581 tree base_name;
3582 tree data_ref_base_var;
3583 tree vec_stmt;
3584 tree addr_base, addr_expr;
3585 tree dest;
3586 gimple_seq seq = NULL;
3587 tree base_offset = unshare_expr (DR_OFFSET (dr));
3588 tree init = unshare_expr (DR_INIT (dr));
3589 tree vect_ptr_type;
3590 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3591 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3592 tree base;
3594 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3596 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3598 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3600 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3601 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3602 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3605 if (loop_vinfo)
3606 base_name = build_fold_indirect_ref (data_ref_base);
3607 else
3609 base_offset = ssize_int (0);
3610 init = ssize_int (0);
3611 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
3614 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3615 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3616 data_ref_base_var);
3617 gimple_seq_add_seq (new_stmt_list, seq);
3619 /* Create base_offset */
3620 base_offset = size_binop (PLUS_EXPR,
3621 fold_convert (sizetype, base_offset),
3622 fold_convert (sizetype, init));
3623 dest = create_tmp_var (sizetype, "base_off");
3624 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3625 gimple_seq_add_seq (new_stmt_list, seq);
3627 if (offset)
3629 tree tmp = create_tmp_var (sizetype, "offset");
3631 offset = fold_build2 (MULT_EXPR, sizetype,
3632 fold_convert (sizetype, offset), step);
3633 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3634 base_offset, offset);
3635 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3636 gimple_seq_add_seq (new_stmt_list, seq);
3639 /* base + base_offset */
3640 if (loop_vinfo)
3641 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3642 else
3644 addr_base = build1 (ADDR_EXPR,
3645 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3646 unshare_expr (DR_REF (dr)));
3649 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3650 base = get_base_address (DR_REF (dr));
3651 if (base
3652 && TREE_CODE (base) == MEM_REF)
3653 vect_ptr_type
3654 = build_qualified_type (vect_ptr_type,
3655 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3657 vec_stmt = fold_convert (vect_ptr_type, addr_base);
3658 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3659 get_name (base_name));
3660 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3661 gimple_seq_add_seq (new_stmt_list, seq);
3663 if (DR_PTR_INFO (dr)
3664 && TREE_CODE (vec_stmt) == SSA_NAME)
3666 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3667 if (offset)
3668 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (vec_stmt));
3671 if (dump_kind_p (MSG_NOTE))
3673 dump_printf_loc (MSG_NOTE, vect_location, "created ");
3674 dump_generic_expr (MSG_NOTE, TDF_SLIM, vec_stmt);
3677 return vec_stmt;
3681 /* Function vect_create_data_ref_ptr.
3683 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3684 location accessed in the loop by STMT, along with the def-use update
3685 chain to appropriately advance the pointer through the loop iterations.
3686 Also set aliasing information for the pointer. This pointer is used by
3687 the callers to this function to create a memory reference expression for
3688 vector load/store access.
3690 Input:
3691 1. STMT: a stmt that references memory. Expected to be of the form
3692 GIMPLE_ASSIGN <name, data-ref> or
3693 GIMPLE_ASSIGN <data-ref, name>.
3694 2. AGGR_TYPE: the type of the reference, which should be either a vector
3695 or an array.
3696 3. AT_LOOP: the loop where the vector memref is to be created.
3697 4. OFFSET (optional): an offset to be added to the initial address accessed
3698 by the data-ref in STMT.
3699 5. BSI: location where the new stmts are to be placed if there is no loop
3700 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3701 pointing to the initial address.
3703 Output:
3704 1. Declare a new ptr to vector_type, and have it point to the base of the
3705 data reference (initial addressed accessed by the data reference).
3706 For example, for vector of type V8HI, the following code is generated:
3708 v8hi *ap;
3709 ap = (v8hi *)initial_address;
3711 if OFFSET is not supplied:
3712 initial_address = &a[init];
3713 if OFFSET is supplied:
3714 initial_address = &a[init + OFFSET];
3716 Return the initial_address in INITIAL_ADDRESS.
3718 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3719 update the pointer in each iteration of the loop.
3721 Return the increment stmt that updates the pointer in PTR_INCR.
3723 3. Set INV_P to true if the access pattern of the data reference in the
3724 vectorized loop is invariant. Set it to false otherwise.
3726 4. Return the pointer. */
3728 tree
3729 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3730 tree offset, tree *initial_address,
3731 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3732 bool only_init, bool *inv_p)
3734 tree base_name;
3735 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3736 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3737 struct loop *loop = NULL;
3738 bool nested_in_vect_loop = false;
3739 struct loop *containing_loop = NULL;
3740 tree aggr_ptr_type;
3741 tree aggr_ptr;
3742 tree new_temp;
3743 gimple vec_stmt;
3744 gimple_seq new_stmt_list = NULL;
3745 edge pe = NULL;
3746 basic_block new_bb;
3747 tree aggr_ptr_init;
3748 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3749 tree aptr;
3750 gimple_stmt_iterator incr_gsi;
3751 bool insert_after;
3752 bool negative;
3753 tree indx_before_incr, indx_after_incr;
3754 gimple incr;
3755 tree step;
3756 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3757 tree base;
3759 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3760 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3762 if (loop_vinfo)
3764 loop = LOOP_VINFO_LOOP (loop_vinfo);
3765 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3766 containing_loop = (gimple_bb (stmt))->loop_father;
3767 pe = loop_preheader_edge (loop);
3769 else
3771 gcc_assert (bb_vinfo);
3772 only_init = true;
3773 *ptr_incr = NULL;
3776 /* Check the step (evolution) of the load in LOOP, and record
3777 whether it's invariant. */
3778 if (nested_in_vect_loop)
3779 step = STMT_VINFO_DR_STEP (stmt_info);
3780 else
3781 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3783 if (tree_int_cst_compare (step, size_zero_node) == 0)
3784 *inv_p = true;
3785 else
3786 *inv_p = false;
3787 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3789 /* Create an expression for the first address accessed by this load
3790 in LOOP. */
3791 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3793 if (dump_kind_p (MSG_NOTE))
3795 tree data_ref_base = base_name;
3796 dump_printf_loc (MSG_NOTE, vect_location,
3797 "create %s-pointer variable to type: ",
3798 tree_code_name[(int) TREE_CODE (aggr_type)]);
3799 dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
3800 if (TREE_CODE (data_ref_base) == VAR_DECL
3801 || TREE_CODE (data_ref_base) == ARRAY_REF)
3802 dump_printf (MSG_NOTE, " vectorizing an array ref: ");
3803 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3804 dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
3805 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3806 dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
3807 dump_generic_expr (MSG_NOTE, TDF_SLIM, base_name);
3810 /* (1) Create the new aggregate-pointer variable. */
3811 aggr_ptr_type = build_pointer_type (aggr_type);
3812 base = get_base_address (DR_REF (dr));
3813 if (base
3814 && TREE_CODE (base) == MEM_REF)
3815 aggr_ptr_type
3816 = build_qualified_type (aggr_ptr_type,
3817 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3818 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3819 get_name (base_name));
3821 /* Vector and array types inherit the alias set of their component
3822 type by default so we need to use a ref-all pointer if the data
3823 reference does not conflict with the created aggregated data
3824 reference because it is not addressable. */
3825 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3826 get_alias_set (DR_REF (dr))))
3828 aggr_ptr_type
3829 = build_pointer_type_for_mode (aggr_type,
3830 TYPE_MODE (aggr_ptr_type), true);
3831 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3832 get_name (base_name));
3835 /* Likewise for any of the data references in the stmt group. */
3836 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3838 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3841 tree lhs = gimple_assign_lhs (orig_stmt);
3842 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3843 get_alias_set (lhs)))
3845 aggr_ptr_type
3846 = build_pointer_type_for_mode (aggr_type,
3847 TYPE_MODE (aggr_ptr_type), true);
3848 aggr_ptr
3849 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3850 get_name (base_name));
3851 break;
3854 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3856 while (orig_stmt);
3859 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3860 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3861 def-use update cycles for the pointer: one relative to the outer-loop
3862 (LOOP), which is what steps (3) and (4) below do. The other is relative
3863 to the inner-loop (which is the inner-most loop containing the dataref),
3864 and this is done be step (5) below.
3866 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3867 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3868 redundant. Steps (3),(4) create the following:
3870 vp0 = &base_addr;
3871 LOOP: vp1 = phi(vp0,vp2)
3874 vp2 = vp1 + step
3875 goto LOOP
3877 If there is an inner-loop nested in loop, then step (5) will also be
3878 applied, and an additional update in the inner-loop will be created:
3880 vp0 = &base_addr;
3881 LOOP: vp1 = phi(vp0,vp2)
3883 inner: vp3 = phi(vp1,vp4)
3884 vp4 = vp3 + inner_step
3885 if () goto inner
3887 vp2 = vp1 + step
3888 if () goto LOOP */
3890 /* (2) Calculate the initial address of the aggregate-pointer, and set
3891 the aggregate-pointer to point to it before the loop. */
3893 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3895 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3896 offset, loop);
3897 if (new_stmt_list)
3899 if (pe)
3901 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3902 gcc_assert (!new_bb);
3904 else
3905 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3908 *initial_address = new_temp;
3910 /* Create: p = (aggr_type *) initial_base */
3911 if (TREE_CODE (new_temp) != SSA_NAME
3912 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3914 vec_stmt = gimple_build_assign (aggr_ptr,
3915 fold_convert (aggr_ptr_type, new_temp));
3916 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3917 /* Copy the points-to information if it exists. */
3918 if (DR_PTR_INFO (dr))
3919 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3920 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3921 if (pe)
3923 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3924 gcc_assert (!new_bb);
3926 else
3927 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3929 else
3930 aggr_ptr_init = new_temp;
3932 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3933 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3934 inner-loop nested in LOOP (during outer-loop vectorization). */
3936 /* No update in loop is required. */
3937 if (only_init && (!loop_vinfo || at_loop == loop))
3938 aptr = aggr_ptr_init;
3939 else
3941 /* The step of the aggregate pointer is the type size. */
3942 tree step = TYPE_SIZE_UNIT (aggr_type);
3943 /* One exception to the above is when the scalar step of the load in
3944 LOOP is zero. In this case the step here is also zero. */
3945 if (*inv_p)
3946 step = size_zero_node;
3947 else if (negative)
3948 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3950 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3952 create_iv (aggr_ptr_init,
3953 fold_convert (aggr_ptr_type, step),
3954 aggr_ptr, loop, &incr_gsi, insert_after,
3955 &indx_before_incr, &indx_after_incr);
3956 incr = gsi_stmt (incr_gsi);
3957 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3959 /* Copy the points-to information if it exists. */
3960 if (DR_PTR_INFO (dr))
3962 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3963 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3965 if (ptr_incr)
3966 *ptr_incr = incr;
3968 aptr = indx_before_incr;
3971 if (!nested_in_vect_loop || only_init)
3972 return aptr;
3975 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3976 nested in LOOP, if exists. */
3978 gcc_assert (nested_in_vect_loop);
3979 if (!only_init)
3981 standard_iv_increment_position (containing_loop, &incr_gsi,
3982 &insert_after);
3983 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3984 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3985 &indx_after_incr);
3986 incr = gsi_stmt (incr_gsi);
3987 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3989 /* Copy the points-to information if it exists. */
3990 if (DR_PTR_INFO (dr))
3992 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3993 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3995 if (ptr_incr)
3996 *ptr_incr = incr;
3998 return indx_before_incr;
4000 else
4001 gcc_unreachable ();
4005 /* Function bump_vector_ptr
4007 Increment a pointer (to a vector type) by vector-size. If requested,
4008 i.e. if PTR-INCR is given, then also connect the new increment stmt
4009 to the existing def-use update-chain of the pointer, by modifying
4010 the PTR_INCR as illustrated below:
4012 The pointer def-use update-chain before this function:
4013 DATAREF_PTR = phi (p_0, p_2)
4014 ....
4015 PTR_INCR: p_2 = DATAREF_PTR + step
4017 The pointer def-use update-chain after this function:
4018 DATAREF_PTR = phi (p_0, p_2)
4019 ....
4020 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4021 ....
4022 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
4024 Input:
4025 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
4026 in the loop.
4027 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
4028 the loop. The increment amount across iterations is expected
4029 to be vector_size.
4030 BSI - location where the new update stmt is to be placed.
4031 STMT - the original scalar memory-access stmt that is being vectorized.
4032 BUMP - optional. The offset by which to bump the pointer. If not given,
4033 the offset is assumed to be vector_size.
4035 Output: Return NEW_DATAREF_PTR as illustrated above.
4039 tree
4040 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
4041 gimple stmt, tree bump)
4043 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4044 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4045 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4046 tree update = TYPE_SIZE_UNIT (vectype);
4047 gimple incr_stmt;
4048 ssa_op_iter iter;
4049 use_operand_p use_p;
4050 tree new_dataref_ptr;
4052 if (bump)
4053 update = bump;
4055 new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
4056 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
4057 dataref_ptr, update);
4058 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
4060 /* Copy the points-to information if it exists. */
4061 if (DR_PTR_INFO (dr))
4063 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
4064 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
4067 if (!ptr_incr)
4068 return new_dataref_ptr;
4070 /* Update the vector-pointer's cross-iteration increment. */
4071 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4073 tree use = USE_FROM_PTR (use_p);
4075 if (use == dataref_ptr)
4076 SET_USE (use_p, new_dataref_ptr);
4077 else
4078 gcc_assert (tree_int_cst_compare (use, update) == 0);
4081 return new_dataref_ptr;
4085 /* Function vect_create_destination_var.
4087 Create a new temporary of type VECTYPE. */
4089 tree
4090 vect_create_destination_var (tree scalar_dest, tree vectype)
4092 tree vec_dest;
4093 const char *new_name;
4094 tree type;
4095 enum vect_var_kind kind;
4097 kind = vectype ? vect_simple_var : vect_scalar_var;
4098 type = vectype ? vectype : TREE_TYPE (scalar_dest);
4100 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4102 new_name = get_name (scalar_dest);
4103 if (!new_name)
4104 new_name = "var_";
4105 vec_dest = vect_get_new_vect_var (type, kind, new_name);
4107 return vec_dest;
4110 /* Function vect_grouped_store_supported.
4112 Returns TRUE if interleave high and interleave low permutations
4113 are supported, and FALSE otherwise. */
4115 bool
4116 vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
4118 enum machine_mode mode = TYPE_MODE (vectype);
4120 /* vect_permute_store_chain requires the group size to be a power of two. */
4121 if (exact_log2 (count) == -1)
4123 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
4124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4125 "the size of the group of accesses"
4126 " is not a power of 2");
4127 return false;
4130 /* Check that the permutation is supported. */
4131 if (VECTOR_MODE_P (mode))
4133 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4134 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4135 for (i = 0; i < nelt / 2; i++)
4137 sel[i * 2] = i;
4138 sel[i * 2 + 1] = i + nelt;
4140 if (can_vec_perm_p (mode, false, sel))
4142 for (i = 0; i < nelt; i++)
4143 sel[i] += nelt / 2;
4144 if (can_vec_perm_p (mode, false, sel))
4145 return true;
4149 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
4150 dump_printf (MSG_MISSED_OPTIMIZATION,
4151 "interleave op not supported by target.");
4152 return false;
4156 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
4157 type VECTYPE. */
4159 bool
4160 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4162 return vect_lanes_optab_supported_p ("vec_store_lanes",
4163 vec_store_lanes_optab,
4164 vectype, count);
4168 /* Function vect_permute_store_chain.
4170 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4171 a power of 2, generate interleave_high/low stmts to reorder the data
4172 correctly for the stores. Return the final references for stores in
4173 RESULT_CHAIN.
4175 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4176 The input is 4 vectors each containing 8 elements. We assign a number to
4177 each element, the input sequence is:
4179 1st vec: 0 1 2 3 4 5 6 7
4180 2nd vec: 8 9 10 11 12 13 14 15
4181 3rd vec: 16 17 18 19 20 21 22 23
4182 4th vec: 24 25 26 27 28 29 30 31
4184 The output sequence should be:
4186 1st vec: 0 8 16 24 1 9 17 25
4187 2nd vec: 2 10 18 26 3 11 19 27
4188 3rd vec: 4 12 20 28 5 13 21 30
4189 4th vec: 6 14 22 30 7 15 23 31
4191 i.e., we interleave the contents of the four vectors in their order.
4193 We use interleave_high/low instructions to create such output. The input of
4194 each interleave_high/low operation is two vectors:
4195 1st vec 2nd vec
4196 0 1 2 3 4 5 6 7
4197 the even elements of the result vector are obtained left-to-right from the
4198 high/low elements of the first vector. The odd elements of the result are
4199 obtained left-to-right from the high/low elements of the second vector.
4200 The output of interleave_high will be: 0 4 1 5
4201 and of interleave_low: 2 6 3 7
4204 The permutation is done in log LENGTH stages. In each stage interleave_high
4205 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4206 where the first argument is taken from the first half of DR_CHAIN and the
4207 second argument from it's second half.
4208 In our example,
4210 I1: interleave_high (1st vec, 3rd vec)
4211 I2: interleave_low (1st vec, 3rd vec)
4212 I3: interleave_high (2nd vec, 4th vec)
4213 I4: interleave_low (2nd vec, 4th vec)
4215 The output for the first stage is:
4217 I1: 0 16 1 17 2 18 3 19
4218 I2: 4 20 5 21 6 22 7 23
4219 I3: 8 24 9 25 10 26 11 27
4220 I4: 12 28 13 29 14 30 15 31
4222 The output of the second stage, i.e. the final result is:
4224 I1: 0 8 16 24 1 9 17 25
4225 I2: 2 10 18 26 3 11 19 27
4226 I3: 4 12 20 28 5 13 21 30
4227 I4: 6 14 22 30 7 15 23 31. */
4229 void
4230 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
4231 unsigned int length,
4232 gimple stmt,
4233 gimple_stmt_iterator *gsi,
4234 VEC(tree,heap) **result_chain)
4236 tree vect1, vect2, high, low;
4237 gimple perm_stmt;
4238 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4239 tree perm_mask_low, perm_mask_high;
4240 unsigned int i, n;
4241 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
4242 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4244 *result_chain = VEC_copy (tree, heap, dr_chain);
4246 for (i = 0, n = nelt / 2; i < n; i++)
4248 sel[i * 2] = i;
4249 sel[i * 2 + 1] = i + nelt;
4251 perm_mask_high = vect_gen_perm_mask (vectype, sel);
4252 gcc_assert (perm_mask_high != NULL);
4254 for (i = 0; i < nelt; i++)
4255 sel[i] += nelt / 2;
4256 perm_mask_low = vect_gen_perm_mask (vectype, sel);
4257 gcc_assert (perm_mask_low != NULL);
4259 for (i = 0, n = exact_log2 (length); i < n; i++)
4261 for (j = 0; j < length/2; j++)
4263 vect1 = VEC_index (tree, dr_chain, j);
4264 vect2 = VEC_index (tree, dr_chain, j+length/2);
4266 /* Create interleaving stmt:
4267 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4268 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
4269 perm_stmt
4270 = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
4271 vect1, vect2, perm_mask_high);
4272 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4273 VEC_replace (tree, *result_chain, 2*j, high);
4275 /* Create interleaving stmt:
4276 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4277 nelt*3/2+1, ...}> */
4278 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
4279 perm_stmt
4280 = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
4281 vect1, vect2, perm_mask_low);
4282 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4283 VEC_replace (tree, *result_chain, 2*j+1, low);
4285 dr_chain = VEC_copy (tree, heap, *result_chain);
4289 /* Function vect_setup_realignment
4291 This function is called when vectorizing an unaligned load using
4292 the dr_explicit_realign[_optimized] scheme.
4293 This function generates the following code at the loop prolog:
4295 p = initial_addr;
4296 x msq_init = *(floor(p)); # prolog load
4297 realignment_token = call target_builtin;
4298 loop:
4299 x msq = phi (msq_init, ---)
4301 The stmts marked with x are generated only for the case of
4302 dr_explicit_realign_optimized.
4304 The code above sets up a new (vector) pointer, pointing to the first
4305 location accessed by STMT, and a "floor-aligned" load using that pointer.
4306 It also generates code to compute the "realignment-token" (if the relevant
4307 target hook was defined), and creates a phi-node at the loop-header bb
4308 whose arguments are the result of the prolog-load (created by this
4309 function) and the result of a load that takes place in the loop (to be
4310 created by the caller to this function).
4312 For the case of dr_explicit_realign_optimized:
4313 The caller to this function uses the phi-result (msq) to create the
4314 realignment code inside the loop, and sets up the missing phi argument,
4315 as follows:
4316 loop:
4317 msq = phi (msq_init, lsq)
4318 lsq = *(floor(p')); # load in loop
4319 result = realign_load (msq, lsq, realignment_token);
4321 For the case of dr_explicit_realign:
4322 loop:
4323 msq = *(floor(p)); # load in loop
4324 p' = p + (VS-1);
4325 lsq = *(floor(p')); # load in loop
4326 result = realign_load (msq, lsq, realignment_token);
4328 Input:
4329 STMT - (scalar) load stmt to be vectorized. This load accesses
4330 a memory location that may be unaligned.
4331 BSI - place where new code is to be inserted.
4332 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4333 is used.
4335 Output:
4336 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4337 target hook, if defined.
4338 Return value - the result of the loop-header phi node. */
4340 tree
4341 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4342 tree *realignment_token,
4343 enum dr_alignment_support alignment_support_scheme,
4344 tree init_addr,
4345 struct loop **at_loop)
4347 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4348 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4349 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4350 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4351 struct loop *loop = NULL;
4352 edge pe = NULL;
4353 tree scalar_dest = gimple_assign_lhs (stmt);
4354 tree vec_dest;
4355 gimple inc;
4356 tree ptr;
4357 tree data_ref;
4358 gimple new_stmt;
4359 basic_block new_bb;
4360 tree msq_init = NULL_TREE;
4361 tree new_temp;
4362 gimple phi_stmt;
4363 tree msq = NULL_TREE;
4364 gimple_seq stmts = NULL;
4365 bool inv_p;
4366 bool compute_in_loop = false;
4367 bool nested_in_vect_loop = false;
4368 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4369 struct loop *loop_for_initial_load = NULL;
4371 if (loop_vinfo)
4373 loop = LOOP_VINFO_LOOP (loop_vinfo);
4374 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4377 gcc_assert (alignment_support_scheme == dr_explicit_realign
4378 || alignment_support_scheme == dr_explicit_realign_optimized);
4380 /* We need to generate three things:
4381 1. the misalignment computation
4382 2. the extra vector load (for the optimized realignment scheme).
4383 3. the phi node for the two vectors from which the realignment is
4384 done (for the optimized realignment scheme). */
4386 /* 1. Determine where to generate the misalignment computation.
4388 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4389 calculation will be generated by this function, outside the loop (in the
4390 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4391 caller, inside the loop.
4393 Background: If the misalignment remains fixed throughout the iterations of
4394 the loop, then both realignment schemes are applicable, and also the
4395 misalignment computation can be done outside LOOP. This is because we are
4396 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4397 are a multiple of VS (the Vector Size), and therefore the misalignment in
4398 different vectorized LOOP iterations is always the same.
4399 The problem arises only if the memory access is in an inner-loop nested
4400 inside LOOP, which is now being vectorized using outer-loop vectorization.
4401 This is the only case when the misalignment of the memory access may not
4402 remain fixed throughout the iterations of the inner-loop (as explained in
4403 detail in vect_supportable_dr_alignment). In this case, not only is the
4404 optimized realignment scheme not applicable, but also the misalignment
4405 computation (and generation of the realignment token that is passed to
4406 REALIGN_LOAD) have to be done inside the loop.
4408 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4409 or not, which in turn determines if the misalignment is computed inside
4410 the inner-loop, or outside LOOP. */
4412 if (init_addr != NULL_TREE || !loop_vinfo)
4414 compute_in_loop = true;
4415 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4419 /* 2. Determine where to generate the extra vector load.
4421 For the optimized realignment scheme, instead of generating two vector
4422 loads in each iteration, we generate a single extra vector load in the
4423 preheader of the loop, and in each iteration reuse the result of the
4424 vector load from the previous iteration. In case the memory access is in
4425 an inner-loop nested inside LOOP, which is now being vectorized using
4426 outer-loop vectorization, we need to determine whether this initial vector
4427 load should be generated at the preheader of the inner-loop, or can be
4428 generated at the preheader of LOOP. If the memory access has no evolution
4429 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4430 to be generated inside LOOP (in the preheader of the inner-loop). */
4432 if (nested_in_vect_loop)
4434 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4435 bool invariant_in_outerloop =
4436 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4437 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4439 else
4440 loop_for_initial_load = loop;
4441 if (at_loop)
4442 *at_loop = loop_for_initial_load;
4444 if (loop_for_initial_load)
4445 pe = loop_preheader_edge (loop_for_initial_load);
4447 /* 3. For the case of the optimized realignment, create the first vector
4448 load at the loop preheader. */
4450 if (alignment_support_scheme == dr_explicit_realign_optimized)
4452 /* Create msq_init = *(floor(p1)) in the loop preheader */
4454 gcc_assert (!compute_in_loop);
4455 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4456 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4457 NULL_TREE, &init_addr, NULL, &inc,
4458 true, &inv_p);
4459 new_temp = copy_ssa_name (ptr, NULL);
4460 new_stmt = gimple_build_assign_with_ops
4461 (BIT_AND_EXPR, new_temp, ptr,
4462 build_int_cst (TREE_TYPE (ptr),
4463 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4464 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4465 gcc_assert (!new_bb);
4466 data_ref
4467 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4468 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4469 new_stmt = gimple_build_assign (vec_dest, data_ref);
4470 new_temp = make_ssa_name (vec_dest, new_stmt);
4471 gimple_assign_set_lhs (new_stmt, new_temp);
4472 if (pe)
4474 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4475 gcc_assert (!new_bb);
4477 else
4478 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4480 msq_init = gimple_assign_lhs (new_stmt);
4483 /* 4. Create realignment token using a target builtin, if available.
4484 It is done either inside the containing loop, or before LOOP (as
4485 determined above). */
4487 if (targetm.vectorize.builtin_mask_for_load)
4489 tree builtin_decl;
4491 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4492 if (!init_addr)
4494 /* Generate the INIT_ADDR computation outside LOOP. */
4495 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4496 NULL_TREE, loop);
4497 if (loop)
4499 pe = loop_preheader_edge (loop);
4500 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4501 gcc_assert (!new_bb);
4503 else
4504 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4507 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4508 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4509 vec_dest =
4510 vect_create_destination_var (scalar_dest,
4511 gimple_call_return_type (new_stmt));
4512 new_temp = make_ssa_name (vec_dest, new_stmt);
4513 gimple_call_set_lhs (new_stmt, new_temp);
4515 if (compute_in_loop)
4516 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4517 else
4519 /* Generate the misalignment computation outside LOOP. */
4520 pe = loop_preheader_edge (loop);
4521 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4522 gcc_assert (!new_bb);
4525 *realignment_token = gimple_call_lhs (new_stmt);
4527 /* The result of the CALL_EXPR to this builtin is determined from
4528 the value of the parameter and no global variables are touched
4529 which makes the builtin a "const" function. Requiring the
4530 builtin to have the "const" attribute makes it unnecessary
4531 to call mark_call_clobbered. */
4532 gcc_assert (TREE_READONLY (builtin_decl));
4535 if (alignment_support_scheme == dr_explicit_realign)
4536 return msq;
4538 gcc_assert (!compute_in_loop);
4539 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4542 /* 5. Create msq = phi <msq_init, lsq> in loop */
4544 pe = loop_preheader_edge (containing_loop);
4545 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4546 msq = make_ssa_name (vec_dest, NULL);
4547 phi_stmt = create_phi_node (msq, containing_loop->header);
4548 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4550 return msq;
4554 /* Function vect_grouped_load_supported.
4556 Returns TRUE if even and odd permutations are supported,
4557 and FALSE otherwise. */
4559 bool
4560 vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4562 enum machine_mode mode = TYPE_MODE (vectype);
4564 /* vect_permute_load_chain requires the group size to be a power of two. */
4565 if (exact_log2 (count) == -1)
4567 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
4568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4569 "the size of the group of accesses"
4570 " is not a power of 2");
4571 return false;
4574 /* Check that the permutation is supported. */
4575 if (VECTOR_MODE_P (mode))
4577 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4578 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4580 for (i = 0; i < nelt; i++)
4581 sel[i] = i * 2;
4582 if (can_vec_perm_p (mode, false, sel))
4584 for (i = 0; i < nelt; i++)
4585 sel[i] = i * 2 + 1;
4586 if (can_vec_perm_p (mode, false, sel))
4587 return true;
4591 if (dump_kind_p (MSG_MISSED_OPTIMIZATION))
4592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4593 "extract even/odd not supported by target");
4594 return false;
4597 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4598 type VECTYPE. */
4600 bool
4601 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4603 return vect_lanes_optab_supported_p ("vec_load_lanes",
4604 vec_load_lanes_optab,
4605 vectype, count);
4608 /* Function vect_permute_load_chain.
4610 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4611 a power of 2, generate extract_even/odd stmts to reorder the input data
4612 correctly. Return the final references for loads in RESULT_CHAIN.
4614 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4615 The input is 4 vectors each containing 8 elements. We assign a number to each
4616 element, the input sequence is:
4618 1st vec: 0 1 2 3 4 5 6 7
4619 2nd vec: 8 9 10 11 12 13 14 15
4620 3rd vec: 16 17 18 19 20 21 22 23
4621 4th vec: 24 25 26 27 28 29 30 31
4623 The output sequence should be:
4625 1st vec: 0 4 8 12 16 20 24 28
4626 2nd vec: 1 5 9 13 17 21 25 29
4627 3rd vec: 2 6 10 14 18 22 26 30
4628 4th vec: 3 7 11 15 19 23 27 31
4630 i.e., the first output vector should contain the first elements of each
4631 interleaving group, etc.
4633 We use extract_even/odd instructions to create such output. The input of
4634 each extract_even/odd operation is two vectors
4635 1st vec 2nd vec
4636 0 1 2 3 4 5 6 7
4638 and the output is the vector of extracted even/odd elements. The output of
4639 extract_even will be: 0 2 4 6
4640 and of extract_odd: 1 3 5 7
4643 The permutation is done in log LENGTH stages. In each stage extract_even
4644 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4645 their order. In our example,
4647 E1: extract_even (1st vec, 2nd vec)
4648 E2: extract_odd (1st vec, 2nd vec)
4649 E3: extract_even (3rd vec, 4th vec)
4650 E4: extract_odd (3rd vec, 4th vec)
4652 The output for the first stage will be:
4654 E1: 0 2 4 6 8 10 12 14
4655 E2: 1 3 5 7 9 11 13 15
4656 E3: 16 18 20 22 24 26 28 30
4657 E4: 17 19 21 23 25 27 29 31
4659 In order to proceed and create the correct sequence for the next stage (or
4660 for the correct output, if the second stage is the last one, as in our
4661 example), we first put the output of extract_even operation and then the
4662 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4663 The input for the second stage is:
4665 1st vec (E1): 0 2 4 6 8 10 12 14
4666 2nd vec (E3): 16 18 20 22 24 26 28 30
4667 3rd vec (E2): 1 3 5 7 9 11 13 15
4668 4th vec (E4): 17 19 21 23 25 27 29 31
4670 The output of the second stage:
4672 E1: 0 4 8 12 16 20 24 28
4673 E2: 2 6 10 14 18 22 26 30
4674 E3: 1 5 9 13 17 21 25 29
4675 E4: 3 7 11 15 19 23 27 31
4677 And RESULT_CHAIN after reordering:
4679 1st vec (E1): 0 4 8 12 16 20 24 28
4680 2nd vec (E3): 1 5 9 13 17 21 25 29
4681 3rd vec (E2): 2 6 10 14 18 22 26 30
4682 4th vec (E4): 3 7 11 15 19 23 27 31. */
4684 static void
4685 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4686 unsigned int length,
4687 gimple stmt,
4688 gimple_stmt_iterator *gsi,
4689 VEC(tree,heap) **result_chain)
4691 tree data_ref, first_vect, second_vect;
4692 tree perm_mask_even, perm_mask_odd;
4693 gimple perm_stmt;
4694 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4695 unsigned int i, j, log_length = exact_log2 (length);
4696 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4697 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4699 *result_chain = VEC_copy (tree, heap, dr_chain);
4701 for (i = 0; i < nelt; ++i)
4702 sel[i] = i * 2;
4703 perm_mask_even = vect_gen_perm_mask (vectype, sel);
4704 gcc_assert (perm_mask_even != NULL);
4706 for (i = 0; i < nelt; ++i)
4707 sel[i] = i * 2 + 1;
4708 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4709 gcc_assert (perm_mask_odd != NULL);
4711 for (i = 0; i < log_length; i++)
4713 for (j = 0; j < length; j += 2)
4715 first_vect = VEC_index (tree, dr_chain, j);
4716 second_vect = VEC_index (tree, dr_chain, j+1);
4718 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4719 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
4720 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4721 first_vect, second_vect,
4722 perm_mask_even);
4723 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4724 VEC_replace (tree, *result_chain, j/2, data_ref);
4726 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4727 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
4728 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4729 first_vect, second_vect,
4730 perm_mask_odd);
4731 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4732 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4734 dr_chain = VEC_copy (tree, heap, *result_chain);
4739 /* Function vect_transform_grouped_load.
4741 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4742 to perform their permutation and ascribe the result vectorized statements to
4743 the scalar statements.
4746 void
4747 vect_transform_grouped_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4748 gimple_stmt_iterator *gsi)
4750 VEC(tree,heap) *result_chain = NULL;
4752 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4753 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4754 vectors, that are ready for vector computation. */
4755 result_chain = VEC_alloc (tree, heap, size);
4756 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4757 vect_record_grouped_load_vectors (stmt, result_chain);
4758 VEC_free (tree, heap, result_chain);
4761 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4762 generated as part of the vectorization of STMT. Assign the statement
4763 for each vector to the associated scalar statement. */
4765 void
4766 vect_record_grouped_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4768 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4769 gimple next_stmt, new_stmt;
4770 unsigned int i, gap_count;
4771 tree tmp_data_ref;
4773 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4774 Since we scan the chain starting from it's first node, their order
4775 corresponds the order of data-refs in RESULT_CHAIN. */
4776 next_stmt = first_stmt;
4777 gap_count = 1;
4778 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4780 if (!next_stmt)
4781 break;
4783 /* Skip the gaps. Loads created for the gaps will be removed by dead
4784 code elimination pass later. No need to check for the first stmt in
4785 the group, since it always exists.
4786 GROUP_GAP is the number of steps in elements from the previous
4787 access (if there is no gap GROUP_GAP is 1). We skip loads that
4788 correspond to the gaps. */
4789 if (next_stmt != first_stmt
4790 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4792 gap_count++;
4793 continue;
4796 while (next_stmt)
4798 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4799 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4800 copies, and we put the new vector statement in the first available
4801 RELATED_STMT. */
4802 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4803 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4804 else
4806 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4808 gimple prev_stmt =
4809 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4810 gimple rel_stmt =
4811 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4812 while (rel_stmt)
4814 prev_stmt = rel_stmt;
4815 rel_stmt =
4816 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4819 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4820 new_stmt;
4824 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4825 gap_count = 1;
4826 /* If NEXT_STMT accesses the same DR as the previous statement,
4827 put the same TMP_DATA_REF as its vectorized statement; otherwise
4828 get the next data-ref from RESULT_CHAIN. */
4829 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4830 break;
4835 /* Function vect_force_dr_alignment_p.
4837 Returns whether the alignment of a DECL can be forced to be aligned
4838 on ALIGNMENT bit boundary. */
4840 bool
4841 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4843 if (TREE_CODE (decl) != VAR_DECL)
4844 return false;
4846 /* We cannot change alignment of common or external symbols as another
4847 translation unit may contain a definition with lower alignment.
4848 The rules of common symbol linking mean that the definition
4849 will override the common symbol. */
4850 if (DECL_EXTERNAL (decl)
4851 || DECL_COMMON (decl))
4852 return false;
4854 if (TREE_ASM_WRITTEN (decl))
4855 return false;
4857 /* Do not override the alignment as specified by the ABI when the used
4858 attribute is set. */
4859 if (DECL_PRESERVE_P (decl))
4860 return false;
4862 if (TREE_STATIC (decl))
4863 return (alignment <= MAX_OFILE_ALIGNMENT);
4864 else
4865 return (alignment <= MAX_STACK_ALIGNMENT);
4869 /* Return whether the data reference DR is supported with respect to its
4870 alignment.
4871 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4872 it is aligned, i.e., check if it is possible to vectorize it with different
4873 alignment. */
4875 enum dr_alignment_support
4876 vect_supportable_dr_alignment (struct data_reference *dr,
4877 bool check_aligned_accesses)
4879 gimple stmt = DR_STMT (dr);
4880 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4881 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4882 enum machine_mode mode = TYPE_MODE (vectype);
4883 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4884 struct loop *vect_loop = NULL;
4885 bool nested_in_vect_loop = false;
4887 if (aligned_access_p (dr) && !check_aligned_accesses)
4888 return dr_aligned;
4890 if (loop_vinfo)
4892 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4893 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4896 /* Possibly unaligned access. */
4898 /* We can choose between using the implicit realignment scheme (generating
4899 a misaligned_move stmt) and the explicit realignment scheme (generating
4900 aligned loads with a REALIGN_LOAD). There are two variants to the
4901 explicit realignment scheme: optimized, and unoptimized.
4902 We can optimize the realignment only if the step between consecutive
4903 vector loads is equal to the vector size. Since the vector memory
4904 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4905 is guaranteed that the misalignment amount remains the same throughout the
4906 execution of the vectorized loop. Therefore, we can create the
4907 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4908 at the loop preheader.
4910 However, in the case of outer-loop vectorization, when vectorizing a
4911 memory access in the inner-loop nested within the LOOP that is now being
4912 vectorized, while it is guaranteed that the misalignment of the
4913 vectorized memory access will remain the same in different outer-loop
4914 iterations, it is *not* guaranteed that is will remain the same throughout
4915 the execution of the inner-loop. This is because the inner-loop advances
4916 with the original scalar step (and not in steps of VS). If the inner-loop
4917 step happens to be a multiple of VS, then the misalignment remains fixed
4918 and we can use the optimized realignment scheme. For example:
4920 for (i=0; i<N; i++)
4921 for (j=0; j<M; j++)
4922 s += a[i+j];
4924 When vectorizing the i-loop in the above example, the step between
4925 consecutive vector loads is 1, and so the misalignment does not remain
4926 fixed across the execution of the inner-loop, and the realignment cannot
4927 be optimized (as illustrated in the following pseudo vectorized loop):
4929 for (i=0; i<N; i+=4)
4930 for (j=0; j<M; j++){
4931 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4932 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4933 // (assuming that we start from an aligned address).
4936 We therefore have to use the unoptimized realignment scheme:
4938 for (i=0; i<N; i+=4)
4939 for (j=k; j<M; j+=4)
4940 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4941 // that the misalignment of the initial address is
4942 // 0).
4944 The loop can then be vectorized as follows:
4946 for (k=0; k<4; k++){
4947 rt = get_realignment_token (&vp[k]);
4948 for (i=0; i<N; i+=4){
4949 v1 = vp[i+k];
4950 for (j=k; j<M; j+=4){
4951 v2 = vp[i+j+VS-1];
4952 va = REALIGN_LOAD <v1,v2,rt>;
4953 vs += va;
4954 v1 = v2;
4957 } */
4959 if (DR_IS_READ (dr))
4961 bool is_packed = false;
4962 tree type = (TREE_TYPE (DR_REF (dr)));
4964 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4965 && (!targetm.vectorize.builtin_mask_for_load
4966 || targetm.vectorize.builtin_mask_for_load ()))
4968 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4969 if ((nested_in_vect_loop
4970 && (TREE_INT_CST_LOW (DR_STEP (dr))
4971 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4972 || !loop_vinfo)
4973 return dr_explicit_realign;
4974 else
4975 return dr_explicit_realign_optimized;
4977 if (!known_alignment_for_access_p (dr))
4978 is_packed = not_size_aligned (DR_REF (dr));
4980 if (targetm.vectorize.
4981 support_vector_misalignment (mode, type,
4982 DR_MISALIGNMENT (dr), is_packed))
4983 /* Can't software pipeline the loads, but can at least do them. */
4984 return dr_unaligned_supported;
4986 else
4988 bool is_packed = false;
4989 tree type = (TREE_TYPE (DR_REF (dr)));
4991 if (!known_alignment_for_access_p (dr))
4992 is_packed = not_size_aligned (DR_REF (dr));
4994 if (targetm.vectorize.
4995 support_vector_misalignment (mode, type,
4996 DR_MISALIGNMENT (dr), is_packed))
4997 return dr_unaligned_supported;
5000 /* Unsupported. */
5001 return dr_unaligned_unsupported;