2011-12-09 François Dumont <fdumont@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob8111bdc61ac5a700b6a456902cf012ff4fba39e2
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
49 static bool
50 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 tree vectype, unsigned HOST_WIDE_INT count)
53 enum machine_mode mode, array_mode;
54 bool limit_p;
56 mode = TYPE_MODE (vectype);
57 limit_p = !targetm.array_mode_supported_p (mode, count);
58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
59 MODE_INT, limit_p);
61 if (array_mode == BLKmode)
63 if (vect_print_dump_info (REPORT_DETAILS))
64 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65 GET_MODE_NAME (mode), count);
66 return false;
69 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
71 if (vect_print_dump_info (REPORT_DETAILS))
72 fprintf (vect_dump, "cannot use %s<%s><%s>",
73 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
74 return false;
77 if (vect_print_dump_info (REPORT_DETAILS))
78 fprintf (vect_dump, "can use %s<%s><%s>",
79 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
81 return true;
85 /* Return the smallest scalar part of STMT.
86 This is used to determine the vectype of the stmt. We generally set the
87 vectype according to the type of the result (lhs). For stmts whose
88 result-type is different than the type of the arguments (e.g., demotion,
89 promotion), vectype will be reset appropriately (later). Note that we have
90 to visit the smallest datatype in this function, because that determines the
91 VF. If the smallest datatype in the loop is present only as the rhs of a
92 promotion operation - we'd miss it.
93 Such a case, where a variable of this datatype does not appear in the lhs
94 anywhere in the loop, can only occur if it's an invariant: e.g.:
95 'int_x = (int) short_inv', which we'd expect to have been optimized away by
96 invariant motion. However, we cannot rely on invariant motion to always
97 take invariants out of the loop, and so in the case of promotion we also
98 have to check the rhs.
99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
100 types. */
102 tree
103 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104 HOST_WIDE_INT *rhs_size_unit)
106 tree scalar_type = gimple_expr_type (stmt);
107 HOST_WIDE_INT lhs, rhs;
109 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
111 if (is_gimple_assign (stmt)
112 && (gimple_assign_cast_p (stmt)
113 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119 if (rhs < lhs)
120 scalar_type = rhs_type;
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
125 return scalar_type;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135 gimple next_stmt = first_stmt;
136 int result = 0;
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139 return -1;
141 while (next_stmt && next_stmt != stmt)
143 result++;
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
147 if (next_stmt)
148 return result;
149 else
150 return -1;
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
158 static void
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
162 gimple prev, next;
163 tree next_init;
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169 while (next)
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
174 /* Insert here. */
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177 return;
179 prev = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
196 FIRST_DR = DRB
197 NEXT_DR (DRB) = DRA
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
204 FIRST_DR = DRB
205 NEXT(FIRST_DR) = previous FIRST_DR
206 else
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
212 static void
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
220 tree node_init;
221 gimple node, prev, next, first_stmt;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229 return;
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
238 return;
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246 old_first_stmt)));
247 gimple tmp;
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
259 while (tmp)
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
265 else
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
271 return;
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
278 return;
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
291 else
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
301 while (node)
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305 while (next)
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
310 /* Insert here. */
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313 prev = node;
314 break;
316 prev = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
319 if (!next)
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324 prev = node;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
337 static bool
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342 gimple earlier_stmt;
344 /* We only call this function for pairs of loads and stores, but we verify
345 it here. */
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 if (DR_IS_READ (dra))
349 return false;
350 else
351 return true;
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
358 return true;
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
367 return true;
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
374 return false;
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383 return false;
385 return true;
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
394 static bool
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
406 return false;
408 /* Check:
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
412 data-refs' inits. */
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
420 return false;
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
426 if (init_a > init_b)
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
432 if (step && (init_a - init_b) > step)
433 return false;
435 if (diff_mod_size == 0)
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
445 return true;
448 else
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
454 if (step && (init_b - init_a) > step)
455 return false;
457 if (diff_mod_size == 0)
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
467 return true;
471 return false;
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
478 static bool
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489 return true;
490 else
491 return false;
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
497 static bool
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504 return true;
505 else
506 return false;
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
513 static bool
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519 return false;
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
529 if (optimize_loop_nest_for_size_p (loop))
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
533 return false;
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
537 if (loop->inner)
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541 return false;
544 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
545 return true;
549 /* Function vect_analyze_data_ref_dependence.
551 Return TRUE if there (might) exist a dependence between a memory-reference
552 DRA and a memory-reference DRB. When versioning for alias may check a
553 dependence at run-time, return FALSE. Adjust *MAX_VF according to
554 the data dependence. */
556 static bool
557 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
558 loop_vec_info loop_vinfo, int *max_vf)
560 unsigned int i;
561 struct loop *loop = NULL;
562 struct data_reference *dra = DDR_A (ddr);
563 struct data_reference *drb = DDR_B (ddr);
564 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
565 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
566 lambda_vector dist_v;
567 unsigned int loop_depth;
569 /* Don't bother to analyze statements marked as unvectorizable. */
570 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
571 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
572 return false;
574 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
576 /* Independent data accesses. */
577 vect_check_interleaving (dra, drb);
578 return false;
581 if (loop_vinfo)
582 loop = LOOP_VINFO_LOOP (loop_vinfo);
584 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
585 return false;
587 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
589 gimple earlier_stmt;
591 if (loop_vinfo)
593 if (vect_print_dump_info (REPORT_DR_DETAILS))
595 fprintf (vect_dump, "versioning for alias required: "
596 "can't determine dependence between ");
597 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
598 fprintf (vect_dump, " and ");
599 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
602 /* Add to list of ddrs that need to be tested at run-time. */
603 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
606 /* When vectorizing a basic block unknown depnedence can still mean
607 strided access. */
608 if (vect_check_interleaving (dra, drb))
609 return false;
611 /* Read-read is OK (we need this check here, after checking for
612 interleaving). */
613 if (DR_IS_READ (dra) && DR_IS_READ (drb))
614 return false;
616 if (vect_print_dump_info (REPORT_DR_DETAILS))
618 fprintf (vect_dump, "can't determine dependence between ");
619 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
620 fprintf (vect_dump, " and ");
621 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
624 /* We do not vectorize basic blocks with write-write dependencies. */
625 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
626 return true;
628 /* Check that it's not a load-after-store dependence. */
629 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
630 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
631 return true;
633 return false;
636 /* Versioning for alias is not yet supported for basic block SLP, and
637 dependence distance is unapplicable, hence, in case of known data
638 dependence, basic block vectorization is impossible for now. */
639 if (!loop_vinfo)
641 if (dra != drb && vect_check_interleaving (dra, drb))
642 return false;
644 if (vect_print_dump_info (REPORT_DR_DETAILS))
646 fprintf (vect_dump, "determined dependence between ");
647 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
648 fprintf (vect_dump, " and ");
649 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
652 /* Do not vectorize basic blcoks with write-write dependences. */
653 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
654 return true;
656 /* Check if this dependence is allowed in basic block vectorization. */
657 return vect_drs_dependent_in_basic_block (dra, drb);
660 /* Loop-based vectorization and known data dependence. */
661 if (DDR_NUM_DIST_VECTS (ddr) == 0)
663 if (vect_print_dump_info (REPORT_DR_DETAILS))
665 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
666 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
667 fprintf (vect_dump, " and ");
668 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
670 /* Add to list of ddrs that need to be tested at run-time. */
671 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
674 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
675 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
677 int dist = dist_v[loop_depth];
679 if (vect_print_dump_info (REPORT_DR_DETAILS))
680 fprintf (vect_dump, "dependence distance = %d.", dist);
682 if (dist == 0)
684 if (vect_print_dump_info (REPORT_DR_DETAILS))
686 fprintf (vect_dump, "dependence distance == 0 between ");
687 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
688 fprintf (vect_dump, " and ");
689 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
692 /* For interleaving, mark that there is a read-write dependency if
693 necessary. We check before that one of the data-refs is store. */
694 if (DR_IS_READ (dra))
695 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
696 else
698 if (DR_IS_READ (drb))
699 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
702 continue;
705 if (dist > 0 && DDR_REVERSED_P (ddr))
707 /* If DDR_REVERSED_P the order of the data-refs in DDR was
708 reversed (to make distance vector positive), and the actual
709 distance is negative. */
710 if (vect_print_dump_info (REPORT_DR_DETAILS))
711 fprintf (vect_dump, "dependence distance negative.");
712 continue;
715 if (abs (dist) >= 2
716 && abs (dist) < *max_vf)
718 /* The dependence distance requires reduction of the maximal
719 vectorization factor. */
720 *max_vf = abs (dist);
721 if (vect_print_dump_info (REPORT_DR_DETAILS))
722 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
723 *max_vf);
726 if (abs (dist) >= *max_vf)
728 /* Dependence distance does not create dependence, as far as
729 vectorization is concerned, in this case. */
730 if (vect_print_dump_info (REPORT_DR_DETAILS))
731 fprintf (vect_dump, "dependence distance >= VF.");
732 continue;
735 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
737 fprintf (vect_dump, "not vectorized, possible dependence "
738 "between data-refs ");
739 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
740 fprintf (vect_dump, " and ");
741 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
744 return true;
747 return false;
750 /* Function vect_analyze_data_ref_dependences.
752 Examine all the data references in the loop, and make sure there do not
753 exist any data dependences between them. Set *MAX_VF according to
754 the maximum vectorization factor the data dependences allow. */
756 bool
757 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
758 bb_vec_info bb_vinfo, int *max_vf)
760 unsigned int i;
761 VEC (ddr_p, heap) *ddrs = NULL;
762 struct data_dependence_relation *ddr;
764 if (vect_print_dump_info (REPORT_DETAILS))
765 fprintf (vect_dump, "=== vect_analyze_dependences ===");
767 if (loop_vinfo)
768 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
769 else
770 ddrs = BB_VINFO_DDRS (bb_vinfo);
772 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
773 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
774 return false;
776 return true;
780 /* Function vect_compute_data_ref_alignment
782 Compute the misalignment of the data reference DR.
784 Output:
785 1. If during the misalignment computation it is found that the data reference
786 cannot be vectorized then false is returned.
787 2. DR_MISALIGNMENT (DR) is defined.
789 FOR NOW: No analysis is actually performed. Misalignment is calculated
790 only for trivial cases. TODO. */
792 static bool
793 vect_compute_data_ref_alignment (struct data_reference *dr)
795 gimple stmt = DR_STMT (dr);
796 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
797 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
798 struct loop *loop = NULL;
799 tree ref = DR_REF (dr);
800 tree vectype;
801 tree base, base_addr;
802 bool base_aligned;
803 tree misalign;
804 tree aligned_to, alignment;
806 if (vect_print_dump_info (REPORT_DETAILS))
807 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
809 if (loop_vinfo)
810 loop = LOOP_VINFO_LOOP (loop_vinfo);
812 /* Initialize misalignment to unknown. */
813 SET_DR_MISALIGNMENT (dr, -1);
815 misalign = DR_INIT (dr);
816 aligned_to = DR_ALIGNED_TO (dr);
817 base_addr = DR_BASE_ADDRESS (dr);
818 vectype = STMT_VINFO_VECTYPE (stmt_info);
820 /* In case the dataref is in an inner-loop of the loop that is being
821 vectorized (LOOP), we use the base and misalignment information
822 relative to the outer-loop (LOOP). This is ok only if the misalignment
823 stays the same throughout the execution of the inner-loop, which is why
824 we have to check that the stride of the dataref in the inner-loop evenly
825 divides by the vector size. */
826 if (loop && nested_in_vect_loop_p (loop, stmt))
828 tree step = DR_STEP (dr);
829 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
831 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
833 if (vect_print_dump_info (REPORT_ALIGNMENT))
834 fprintf (vect_dump, "inner step divides the vector-size.");
835 misalign = STMT_VINFO_DR_INIT (stmt_info);
836 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
837 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
839 else
841 if (vect_print_dump_info (REPORT_ALIGNMENT))
842 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
843 misalign = NULL_TREE;
847 base = build_fold_indirect_ref (base_addr);
848 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
850 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
851 || !misalign)
853 if (vect_print_dump_info (REPORT_ALIGNMENT))
855 fprintf (vect_dump, "Unknown alignment for access: ");
856 print_generic_expr (vect_dump, base, TDF_SLIM);
858 return true;
861 if ((DECL_P (base)
862 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
863 alignment) >= 0)
864 || (TREE_CODE (base_addr) == SSA_NAME
865 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
866 TREE_TYPE (base_addr)))),
867 alignment) >= 0)
868 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
869 base_aligned = true;
870 else
871 base_aligned = false;
873 if (!base_aligned)
875 /* Do not change the alignment of global variables if
876 flag_section_anchors is enabled. */
877 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
878 || (TREE_STATIC (base) && flag_section_anchors))
880 if (vect_print_dump_info (REPORT_DETAILS))
882 fprintf (vect_dump, "can't force alignment of ref: ");
883 print_generic_expr (vect_dump, ref, TDF_SLIM);
885 return true;
888 /* Force the alignment of the decl.
889 NOTE: This is the only change to the code we make during
890 the analysis phase, before deciding to vectorize the loop. */
891 if (vect_print_dump_info (REPORT_DETAILS))
893 fprintf (vect_dump, "force alignment of ");
894 print_generic_expr (vect_dump, ref, TDF_SLIM);
897 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
898 DECL_USER_ALIGN (base) = 1;
901 /* At this point we assume that the base is aligned. */
902 gcc_assert (base_aligned
903 || (TREE_CODE (base) == VAR_DECL
904 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
906 /* If this is a backward running DR then first access in the larger
907 vectype actually is N-1 elements before the address in the DR.
908 Adjust misalign accordingly. */
909 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
911 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
912 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
913 otherwise we wouldn't be here. */
914 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
915 /* PLUS because DR_STEP was negative. */
916 misalign = size_binop (PLUS_EXPR, misalign, offset);
919 /* Modulo alignment. */
920 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
922 if (!host_integerp (misalign, 1))
924 /* Negative or overflowed misalignment value. */
925 if (vect_print_dump_info (REPORT_DETAILS))
926 fprintf (vect_dump, "unexpected misalign value");
927 return false;
930 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
932 if (vect_print_dump_info (REPORT_DETAILS))
934 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
935 print_generic_expr (vect_dump, ref, TDF_SLIM);
938 return true;
942 /* Function vect_compute_data_refs_alignment
944 Compute the misalignment of data references in the loop.
945 Return FALSE if a data reference is found that cannot be vectorized. */
947 static bool
948 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
949 bb_vec_info bb_vinfo)
951 VEC (data_reference_p, heap) *datarefs;
952 struct data_reference *dr;
953 unsigned int i;
955 if (loop_vinfo)
956 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
957 else
958 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
960 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
961 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
962 && !vect_compute_data_ref_alignment (dr))
964 if (bb_vinfo)
966 /* Mark unsupported statement as unvectorizable. */
967 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
968 continue;
970 else
971 return false;
974 return true;
978 /* Function vect_update_misalignment_for_peel
980 DR - the data reference whose misalignment is to be adjusted.
981 DR_PEEL - the data reference whose misalignment is being made
982 zero in the vector loop by the peel.
983 NPEEL - the number of iterations in the peel loop if the misalignment
984 of DR_PEEL is known at compile time. */
986 static void
987 vect_update_misalignment_for_peel (struct data_reference *dr,
988 struct data_reference *dr_peel, int npeel)
990 unsigned int i;
991 VEC(dr_p,heap) *same_align_drs;
992 struct data_reference *current_dr;
993 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
994 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
995 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
996 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
998 /* For interleaved data accesses the step in the loop must be multiplied by
999 the size of the interleaving group. */
1000 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1001 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1002 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1003 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1005 /* It can be assumed that the data refs with the same alignment as dr_peel
1006 are aligned in the vector loop. */
1007 same_align_drs
1008 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1009 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1011 if (current_dr != dr)
1012 continue;
1013 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1014 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1015 SET_DR_MISALIGNMENT (dr, 0);
1016 return;
1019 if (known_alignment_for_access_p (dr)
1020 && known_alignment_for_access_p (dr_peel))
1022 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1023 int misal = DR_MISALIGNMENT (dr);
1024 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1025 misal += negative ? -npeel * dr_size : npeel * dr_size;
1026 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1027 SET_DR_MISALIGNMENT (dr, misal);
1028 return;
1031 if (vect_print_dump_info (REPORT_DETAILS))
1032 fprintf (vect_dump, "Setting misalignment to -1.");
1033 SET_DR_MISALIGNMENT (dr, -1);
1037 /* Function vect_verify_datarefs_alignment
1039 Return TRUE if all data references in the loop can be
1040 handled with respect to alignment. */
1042 bool
1043 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1045 VEC (data_reference_p, heap) *datarefs;
1046 struct data_reference *dr;
1047 enum dr_alignment_support supportable_dr_alignment;
1048 unsigned int i;
1050 if (loop_vinfo)
1051 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1052 else
1053 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1055 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1057 gimple stmt = DR_STMT (dr);
1058 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1060 /* For interleaving, only the alignment of the first access matters.
1061 Skip statements marked as not vectorizable. */
1062 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1063 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1064 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1065 continue;
1067 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1068 if (!supportable_dr_alignment)
1070 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1072 if (DR_IS_READ (dr))
1073 fprintf (vect_dump,
1074 "not vectorized: unsupported unaligned load.");
1075 else
1076 fprintf (vect_dump,
1077 "not vectorized: unsupported unaligned store.");
1079 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1081 return false;
1083 if (supportable_dr_alignment != dr_aligned
1084 && vect_print_dump_info (REPORT_ALIGNMENT))
1085 fprintf (vect_dump, "Vectorizing an unaligned access.");
1087 return true;
1091 /* Function vector_alignment_reachable_p
1093 Return true if vector alignment for DR is reachable by peeling
1094 a few loop iterations. Return false otherwise. */
1096 static bool
1097 vector_alignment_reachable_p (struct data_reference *dr)
1099 gimple stmt = DR_STMT (dr);
1100 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1101 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1103 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1105 /* For interleaved access we peel only if number of iterations in
1106 the prolog loop ({VF - misalignment}), is a multiple of the
1107 number of the interleaved accesses. */
1108 int elem_size, mis_in_elements;
1109 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1111 /* FORNOW: handle only known alignment. */
1112 if (!known_alignment_for_access_p (dr))
1113 return false;
1115 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1116 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1118 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1119 return false;
1122 /* If misalignment is known at the compile time then allow peeling
1123 only if natural alignment is reachable through peeling. */
1124 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1126 HOST_WIDE_INT elmsize =
1127 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1128 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1131 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1133 if (DR_MISALIGNMENT (dr) % elmsize)
1135 if (vect_print_dump_info (REPORT_DETAILS))
1136 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1137 return false;
1141 if (!known_alignment_for_access_p (dr))
1143 tree type = (TREE_TYPE (DR_REF (dr)));
1144 tree ba = DR_BASE_OBJECT (dr);
1145 bool is_packed = false;
1147 if (ba)
1148 is_packed = contains_packed_reference (ba);
1150 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1151 is_packed = true;
1153 if (vect_print_dump_info (REPORT_DETAILS))
1154 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1155 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1156 return true;
1157 else
1158 return false;
1161 return true;
1165 /* Calculate the cost of the memory access represented by DR. */
1167 static void
1168 vect_get_data_access_cost (struct data_reference *dr,
1169 unsigned int *inside_cost,
1170 unsigned int *outside_cost)
1172 gimple stmt = DR_STMT (dr);
1173 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1174 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1176 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1177 int ncopies = vf / nunits;
1178 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1180 if (!supportable_dr_alignment)
1181 *inside_cost = VECT_MAX_COST;
1182 else
1184 if (DR_IS_READ (dr))
1185 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1186 else
1187 vect_get_store_cost (dr, ncopies, inside_cost);
1190 if (vect_print_dump_info (REPORT_COST))
1191 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1192 "outside_cost = %d.", *inside_cost, *outside_cost);
1196 static hashval_t
1197 vect_peeling_hash (const void *elem)
1199 const struct _vect_peel_info *peel_info;
1201 peel_info = (const struct _vect_peel_info *) elem;
1202 return (hashval_t) peel_info->npeel;
1206 static int
1207 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1209 const struct _vect_peel_info *a, *b;
1211 a = (const struct _vect_peel_info *) elem1;
1212 b = (const struct _vect_peel_info *) elem2;
1213 return (a->npeel == b->npeel);
1217 /* Insert DR into peeling hash table with NPEEL as key. */
1219 static void
1220 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1221 int npeel)
1223 struct _vect_peel_info elem, *slot;
1224 void **new_slot;
1225 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1227 elem.npeel = npeel;
1228 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1229 &elem);
1230 if (slot)
1231 slot->count++;
1232 else
1234 slot = XNEW (struct _vect_peel_info);
1235 slot->npeel = npeel;
1236 slot->dr = dr;
1237 slot->count = 1;
1238 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1239 INSERT);
1240 *new_slot = slot;
1243 if (!supportable_dr_alignment && !flag_vect_cost_model)
1244 slot->count += VECT_MAX_COST;
1248 /* Traverse peeling hash table to find peeling option that aligns maximum
1249 number of data accesses. */
1251 static int
1252 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1254 vect_peel_info elem = (vect_peel_info) *slot;
1255 vect_peel_extended_info max = (vect_peel_extended_info) data;
1257 if (elem->count > max->peel_info.count
1258 || (elem->count == max->peel_info.count
1259 && max->peel_info.npeel > elem->npeel))
1261 max->peel_info.npeel = elem->npeel;
1262 max->peel_info.count = elem->count;
1263 max->peel_info.dr = elem->dr;
1266 return 1;
1270 /* Traverse peeling hash table and calculate cost for each peeling option.
1271 Find the one with the lowest cost. */
1273 static int
1274 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1276 vect_peel_info elem = (vect_peel_info) *slot;
1277 vect_peel_extended_info min = (vect_peel_extended_info) data;
1278 int save_misalignment, dummy;
1279 unsigned int inside_cost = 0, outside_cost = 0, i;
1280 gimple stmt = DR_STMT (elem->dr);
1281 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1282 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1283 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1284 struct data_reference *dr;
1286 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1288 stmt = DR_STMT (dr);
1289 stmt_info = vinfo_for_stmt (stmt);
1290 /* For interleaving, only the alignment of the first access
1291 matters. */
1292 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1293 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1294 continue;
1296 save_misalignment = DR_MISALIGNMENT (dr);
1297 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1298 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1299 SET_DR_MISALIGNMENT (dr, save_misalignment);
1302 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1303 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1305 if (inside_cost < min->inside_cost
1306 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1308 min->inside_cost = inside_cost;
1309 min->outside_cost = outside_cost;
1310 min->peel_info.dr = elem->dr;
1311 min->peel_info.npeel = elem->npeel;
1314 return 1;
1318 /* Choose best peeling option by traversing peeling hash table and either
1319 choosing an option with the lowest cost (if cost model is enabled) or the
1320 option that aligns as many accesses as possible. */
1322 static struct data_reference *
1323 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1324 unsigned int *npeel)
1326 struct _vect_peel_extended_info res;
1328 res.peel_info.dr = NULL;
1330 if (flag_vect_cost_model)
1332 res.inside_cost = INT_MAX;
1333 res.outside_cost = INT_MAX;
1334 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1335 vect_peeling_hash_get_lowest_cost, &res);
1337 else
1339 res.peel_info.count = 0;
1340 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1341 vect_peeling_hash_get_most_frequent, &res);
1344 *npeel = res.peel_info.npeel;
1345 return res.peel_info.dr;
1349 /* Function vect_enhance_data_refs_alignment
1351 This pass will use loop versioning and loop peeling in order to enhance
1352 the alignment of data references in the loop.
1354 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1355 original loop is to be vectorized. Any other loops that are created by
1356 the transformations performed in this pass - are not supposed to be
1357 vectorized. This restriction will be relaxed.
1359 This pass will require a cost model to guide it whether to apply peeling
1360 or versioning or a combination of the two. For example, the scheme that
1361 intel uses when given a loop with several memory accesses, is as follows:
1362 choose one memory access ('p') which alignment you want to force by doing
1363 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1364 other accesses are not necessarily aligned, or (2) use loop versioning to
1365 generate one loop in which all accesses are aligned, and another loop in
1366 which only 'p' is necessarily aligned.
1368 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1369 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1370 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1372 Devising a cost model is the most critical aspect of this work. It will
1373 guide us on which access to peel for, whether to use loop versioning, how
1374 many versions to create, etc. The cost model will probably consist of
1375 generic considerations as well as target specific considerations (on
1376 powerpc for example, misaligned stores are more painful than misaligned
1377 loads).
1379 Here are the general steps involved in alignment enhancements:
1381 -- original loop, before alignment analysis:
1382 for (i=0; i<N; i++){
1383 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1384 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1387 -- After vect_compute_data_refs_alignment:
1388 for (i=0; i<N; i++){
1389 x = q[i]; # DR_MISALIGNMENT(q) = 3
1390 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1393 -- Possibility 1: we do loop versioning:
1394 if (p is aligned) {
1395 for (i=0; i<N; i++){ # loop 1A
1396 x = q[i]; # DR_MISALIGNMENT(q) = 3
1397 p[i] = y; # DR_MISALIGNMENT(p) = 0
1400 else {
1401 for (i=0; i<N; i++){ # loop 1B
1402 x = q[i]; # DR_MISALIGNMENT(q) = 3
1403 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1407 -- Possibility 2: we do loop peeling:
1408 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1409 x = q[i];
1410 p[i] = y;
1412 for (i = 3; i < N; i++){ # loop 2A
1413 x = q[i]; # DR_MISALIGNMENT(q) = 0
1414 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1417 -- Possibility 3: combination of loop peeling and versioning:
1418 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1419 x = q[i];
1420 p[i] = y;
1422 if (p is aligned) {
1423 for (i = 3; i<N; i++){ # loop 3A
1424 x = q[i]; # DR_MISALIGNMENT(q) = 0
1425 p[i] = y; # DR_MISALIGNMENT(p) = 0
1428 else {
1429 for (i = 3; i<N; i++){ # loop 3B
1430 x = q[i]; # DR_MISALIGNMENT(q) = 0
1431 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1435 These loops are later passed to loop_transform to be vectorized. The
1436 vectorizer will use the alignment information to guide the transformation
1437 (whether to generate regular loads/stores, or with special handling for
1438 misalignment). */
1440 bool
1441 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1443 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1444 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1445 enum dr_alignment_support supportable_dr_alignment;
1446 struct data_reference *dr0 = NULL, *first_store = NULL;
1447 struct data_reference *dr;
1448 unsigned int i, j;
1449 bool do_peeling = false;
1450 bool do_versioning = false;
1451 bool stat;
1452 gimple stmt;
1453 stmt_vec_info stmt_info;
1454 int vect_versioning_for_alias_required;
1455 unsigned int npeel = 0;
1456 bool all_misalignments_unknown = true;
1457 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1458 unsigned possible_npeel_number = 1;
1459 tree vectype;
1460 unsigned int nelements, mis, same_align_drs_max = 0;
1462 if (vect_print_dump_info (REPORT_DETAILS))
1463 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1465 /* While cost model enhancements are expected in the future, the high level
1466 view of the code at this time is as follows:
1468 A) If there is a misaligned access then see if peeling to align
1469 this access can make all data references satisfy
1470 vect_supportable_dr_alignment. If so, update data structures
1471 as needed and return true.
1473 B) If peeling wasn't possible and there is a data reference with an
1474 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1475 then see if loop versioning checks can be used to make all data
1476 references satisfy vect_supportable_dr_alignment. If so, update
1477 data structures as needed and return true.
1479 C) If neither peeling nor versioning were successful then return false if
1480 any data reference does not satisfy vect_supportable_dr_alignment.
1482 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1484 Note, Possibility 3 above (which is peeling and versioning together) is not
1485 being done at this time. */
1487 /* (1) Peeling to force alignment. */
1489 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1490 Considerations:
1491 + How many accesses will become aligned due to the peeling
1492 - How many accesses will become unaligned due to the peeling,
1493 and the cost of misaligned accesses.
1494 - The cost of peeling (the extra runtime checks, the increase
1495 in code size). */
1497 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1499 stmt = DR_STMT (dr);
1500 stmt_info = vinfo_for_stmt (stmt);
1502 if (!STMT_VINFO_RELEVANT (stmt_info))
1503 continue;
1505 /* For interleaving, only the alignment of the first access
1506 matters. */
1507 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1508 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1509 continue;
1511 /* For invariant accesses there is nothing to enhance. */
1512 if (integer_zerop (DR_STEP (dr)))
1513 continue;
1515 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1516 do_peeling = vector_alignment_reachable_p (dr);
1517 if (do_peeling)
1519 if (known_alignment_for_access_p (dr))
1521 unsigned int npeel_tmp;
1522 bool negative = tree_int_cst_compare (DR_STEP (dr),
1523 size_zero_node) < 0;
1525 /* Save info about DR in the hash table. */
1526 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1527 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1528 htab_create (1, vect_peeling_hash,
1529 vect_peeling_hash_eq, free);
1531 vectype = STMT_VINFO_VECTYPE (stmt_info);
1532 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1533 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1534 TREE_TYPE (DR_REF (dr))));
1535 npeel_tmp = (negative
1536 ? (mis - nelements) : (nelements - mis))
1537 & (nelements - 1);
1539 /* For multiple types, it is possible that the bigger type access
1540 will have more than one peeling option. E.g., a loop with two
1541 types: one of size (vector size / 4), and the other one of
1542 size (vector size / 8). Vectorization factor will 8. If both
1543 access are misaligned by 3, the first one needs one scalar
1544 iteration to be aligned, and the second one needs 5. But the
1545 the first one will be aligned also by peeling 5 scalar
1546 iterations, and in that case both accesses will be aligned.
1547 Hence, except for the immediate peeling amount, we also want
1548 to try to add full vector size, while we don't exceed
1549 vectorization factor.
1550 We do this automtically for cost model, since we calculate cost
1551 for every peeling option. */
1552 if (!flag_vect_cost_model)
1553 possible_npeel_number = vf /nelements;
1555 /* Handle the aligned case. We may decide to align some other
1556 access, making DR unaligned. */
1557 if (DR_MISALIGNMENT (dr) == 0)
1559 npeel_tmp = 0;
1560 if (!flag_vect_cost_model)
1561 possible_npeel_number++;
1564 for (j = 0; j < possible_npeel_number; j++)
1566 gcc_assert (npeel_tmp <= vf);
1567 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1568 npeel_tmp += nelements;
1571 all_misalignments_unknown = false;
1572 /* Data-ref that was chosen for the case that all the
1573 misalignments are unknown is not relevant anymore, since we
1574 have a data-ref with known alignment. */
1575 dr0 = NULL;
1577 else
1579 /* If we don't know all the misalignment values, we prefer
1580 peeling for data-ref that has maximum number of data-refs
1581 with the same alignment, unless the target prefers to align
1582 stores over load. */
1583 if (all_misalignments_unknown)
1585 if (same_align_drs_max < VEC_length (dr_p,
1586 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1587 || !dr0)
1589 same_align_drs_max = VEC_length (dr_p,
1590 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1591 dr0 = dr;
1594 if (!first_store && DR_IS_WRITE (dr))
1595 first_store = dr;
1598 /* If there are both known and unknown misaligned accesses in the
1599 loop, we choose peeling amount according to the known
1600 accesses. */
1603 if (!supportable_dr_alignment)
1605 dr0 = dr;
1606 if (!first_store && DR_IS_WRITE (dr))
1607 first_store = dr;
1611 else
1613 if (!aligned_access_p (dr))
1615 if (vect_print_dump_info (REPORT_DETAILS))
1616 fprintf (vect_dump, "vector alignment may not be reachable");
1618 break;
1623 vect_versioning_for_alias_required
1624 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1626 /* Temporarily, if versioning for alias is required, we disable peeling
1627 until we support peeling and versioning. Often peeling for alignment
1628 will require peeling for loop-bound, which in turn requires that we
1629 know how to adjust the loop ivs after the loop. */
1630 if (vect_versioning_for_alias_required
1631 || !vect_can_advance_ivs_p (loop_vinfo)
1632 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1633 do_peeling = false;
1635 if (do_peeling && all_misalignments_unknown
1636 && vect_supportable_dr_alignment (dr0, false))
1639 /* Check if the target requires to prefer stores over loads, i.e., if
1640 misaligned stores are more expensive than misaligned loads (taking
1641 drs with same alignment into account). */
1642 if (first_store && DR_IS_READ (dr0))
1644 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1645 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1646 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1647 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1649 vect_get_data_access_cost (dr0, &load_inside_cost,
1650 &load_outside_cost);
1651 vect_get_data_access_cost (first_store, &store_inside_cost,
1652 &store_outside_cost);
1654 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1655 aligning the load DR0). */
1656 load_inside_penalty = store_inside_cost;
1657 load_outside_penalty = store_outside_cost;
1658 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1659 (vinfo_for_stmt (DR_STMT (first_store))),
1660 i, dr);
1661 i++)
1662 if (DR_IS_READ (dr))
1664 load_inside_penalty += load_inside_cost;
1665 load_outside_penalty += load_outside_cost;
1667 else
1669 load_inside_penalty += store_inside_cost;
1670 load_outside_penalty += store_outside_cost;
1673 /* Calculate the penalty for leaving DR0 unaligned (by
1674 aligning the FIRST_STORE). */
1675 store_inside_penalty = load_inside_cost;
1676 store_outside_penalty = load_outside_cost;
1677 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1678 (vinfo_for_stmt (DR_STMT (dr0))),
1679 i, dr);
1680 i++)
1681 if (DR_IS_READ (dr))
1683 store_inside_penalty += load_inside_cost;
1684 store_outside_penalty += load_outside_cost;
1686 else
1688 store_inside_penalty += store_inside_cost;
1689 store_outside_penalty += store_outside_cost;
1692 if (load_inside_penalty > store_inside_penalty
1693 || (load_inside_penalty == store_inside_penalty
1694 && load_outside_penalty > store_outside_penalty))
1695 dr0 = first_store;
1698 /* In case there are only loads with different unknown misalignments, use
1699 peeling only if it may help to align other accesses in the loop. */
1700 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1701 (vinfo_for_stmt (DR_STMT (dr0))))
1702 && vect_supportable_dr_alignment (dr0, false)
1703 != dr_unaligned_supported)
1704 do_peeling = false;
1707 if (do_peeling && !dr0)
1709 /* Peeling is possible, but there is no data access that is not supported
1710 unless aligned. So we try to choose the best possible peeling. */
1712 /* We should get here only if there are drs with known misalignment. */
1713 gcc_assert (!all_misalignments_unknown);
1715 /* Choose the best peeling from the hash table. */
1716 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1717 if (!dr0 || !npeel)
1718 do_peeling = false;
1721 if (do_peeling)
1723 stmt = DR_STMT (dr0);
1724 stmt_info = vinfo_for_stmt (stmt);
1725 vectype = STMT_VINFO_VECTYPE (stmt_info);
1726 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1728 if (known_alignment_for_access_p (dr0))
1730 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1731 size_zero_node) < 0;
1732 if (!npeel)
1734 /* Since it's known at compile time, compute the number of
1735 iterations in the peeled loop (the peeling factor) for use in
1736 updating DR_MISALIGNMENT values. The peeling factor is the
1737 vectorization factor minus the misalignment as an element
1738 count. */
1739 mis = DR_MISALIGNMENT (dr0);
1740 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1741 npeel = ((negative ? mis - nelements : nelements - mis)
1742 & (nelements - 1));
1745 /* For interleaved data access every iteration accesses all the
1746 members of the group, therefore we divide the number of iterations
1747 by the group size. */
1748 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1749 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1750 npeel /= GROUP_SIZE (stmt_info);
1752 if (vect_print_dump_info (REPORT_DETAILS))
1753 fprintf (vect_dump, "Try peeling by %d", npeel);
1756 /* Ensure that all data refs can be vectorized after the peel. */
1757 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1759 int save_misalignment;
1761 if (dr == dr0)
1762 continue;
1764 stmt = DR_STMT (dr);
1765 stmt_info = vinfo_for_stmt (stmt);
1766 /* For interleaving, only the alignment of the first access
1767 matters. */
1768 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1769 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1770 continue;
1772 save_misalignment = DR_MISALIGNMENT (dr);
1773 vect_update_misalignment_for_peel (dr, dr0, npeel);
1774 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1775 SET_DR_MISALIGNMENT (dr, save_misalignment);
1777 if (!supportable_dr_alignment)
1779 do_peeling = false;
1780 break;
1784 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1786 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1787 if (!stat)
1788 do_peeling = false;
1789 else
1790 return stat;
1793 if (do_peeling)
1795 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1796 If the misalignment of DR_i is identical to that of dr0 then set
1797 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1798 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1799 by the peeling factor times the element size of DR_i (MOD the
1800 vectorization factor times the size). Otherwise, the
1801 misalignment of DR_i must be set to unknown. */
1802 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1803 if (dr != dr0)
1804 vect_update_misalignment_for_peel (dr, dr0, npeel);
1806 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1807 if (npeel)
1808 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1809 else
1810 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1811 SET_DR_MISALIGNMENT (dr0, 0);
1812 if (vect_print_dump_info (REPORT_ALIGNMENT))
1813 fprintf (vect_dump, "Alignment of access forced using peeling.");
1815 if (vect_print_dump_info (REPORT_DETAILS))
1816 fprintf (vect_dump, "Peeling for alignment will be applied.");
1818 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1819 gcc_assert (stat);
1820 return stat;
1825 /* (2) Versioning to force alignment. */
1827 /* Try versioning if:
1828 1) flag_tree_vect_loop_version is TRUE
1829 2) optimize loop for speed
1830 3) there is at least one unsupported misaligned data ref with an unknown
1831 misalignment, and
1832 4) all misaligned data refs with a known misalignment are supported, and
1833 5) the number of runtime alignment checks is within reason. */
1835 do_versioning =
1836 flag_tree_vect_loop_version
1837 && optimize_loop_nest_for_speed_p (loop)
1838 && (!loop->inner); /* FORNOW */
1840 if (do_versioning)
1842 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1844 stmt = DR_STMT (dr);
1845 stmt_info = vinfo_for_stmt (stmt);
1847 /* For interleaving, only the alignment of the first access
1848 matters. */
1849 if (aligned_access_p (dr)
1850 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1851 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1852 continue;
1854 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1856 if (!supportable_dr_alignment)
1858 gimple stmt;
1859 int mask;
1860 tree vectype;
1862 if (known_alignment_for_access_p (dr)
1863 || VEC_length (gimple,
1864 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1865 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1867 do_versioning = false;
1868 break;
1871 stmt = DR_STMT (dr);
1872 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1873 gcc_assert (vectype);
1875 /* The rightmost bits of an aligned address must be zeros.
1876 Construct the mask needed for this test. For example,
1877 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1878 mask must be 15 = 0xf. */
1879 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1881 /* FORNOW: use the same mask to test all potentially unaligned
1882 references in the loop. The vectorizer currently supports
1883 a single vector size, see the reference to
1884 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1885 vectorization factor is computed. */
1886 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1887 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1888 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1889 VEC_safe_push (gimple, heap,
1890 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1891 DR_STMT (dr));
1895 /* Versioning requires at least one misaligned data reference. */
1896 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1897 do_versioning = false;
1898 else if (!do_versioning)
1899 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1902 if (do_versioning)
1904 VEC(gimple,heap) *may_misalign_stmts
1905 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1906 gimple stmt;
1908 /* It can now be assumed that the data references in the statements
1909 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1910 of the loop being vectorized. */
1911 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1913 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1914 dr = STMT_VINFO_DATA_REF (stmt_info);
1915 SET_DR_MISALIGNMENT (dr, 0);
1916 if (vect_print_dump_info (REPORT_ALIGNMENT))
1917 fprintf (vect_dump, "Alignment of access forced using versioning.");
1920 if (vect_print_dump_info (REPORT_DETAILS))
1921 fprintf (vect_dump, "Versioning for alignment will be applied.");
1923 /* Peeling and versioning can't be done together at this time. */
1924 gcc_assert (! (do_peeling && do_versioning));
1926 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1927 gcc_assert (stat);
1928 return stat;
1931 /* This point is reached if neither peeling nor versioning is being done. */
1932 gcc_assert (! (do_peeling || do_versioning));
1934 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1935 return stat;
1939 /* Function vect_find_same_alignment_drs.
1941 Update group and alignment relations according to the chosen
1942 vectorization factor. */
1944 static void
1945 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1946 loop_vec_info loop_vinfo)
1948 unsigned int i;
1949 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1950 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1951 struct data_reference *dra = DDR_A (ddr);
1952 struct data_reference *drb = DDR_B (ddr);
1953 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1954 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1955 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1956 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1957 lambda_vector dist_v;
1958 unsigned int loop_depth;
1960 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1961 return;
1963 if (dra == drb)
1964 return;
1966 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1967 return;
1969 /* Loop-based vectorization and known data dependence. */
1970 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1971 return;
1973 /* Data-dependence analysis reports a distance vector of zero
1974 for data-references that overlap only in the first iteration
1975 but have different sign step (see PR45764).
1976 So as a sanity check require equal DR_STEP. */
1977 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1978 return;
1980 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1981 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1983 int dist = dist_v[loop_depth];
1985 if (vect_print_dump_info (REPORT_DR_DETAILS))
1986 fprintf (vect_dump, "dependence distance = %d.", dist);
1988 /* Same loop iteration. */
1989 if (dist == 0
1990 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1992 /* Two references with distance zero have the same alignment. */
1993 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1994 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1995 if (vect_print_dump_info (REPORT_ALIGNMENT))
1996 fprintf (vect_dump, "accesses have the same alignment.");
1997 if (vect_print_dump_info (REPORT_DR_DETAILS))
1999 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
2000 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
2001 fprintf (vect_dump, " and ");
2002 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
2009 /* Function vect_analyze_data_refs_alignment
2011 Analyze the alignment of the data-references in the loop.
2012 Return FALSE if a data reference is found that cannot be vectorized. */
2014 bool
2015 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2016 bb_vec_info bb_vinfo)
2018 if (vect_print_dump_info (REPORT_DETAILS))
2019 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2021 /* Mark groups of data references with same alignment using
2022 data dependence information. */
2023 if (loop_vinfo)
2025 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2026 struct data_dependence_relation *ddr;
2027 unsigned int i;
2029 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2030 vect_find_same_alignment_drs (ddr, loop_vinfo);
2033 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2035 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2036 fprintf (vect_dump,
2037 "not vectorized: can't calculate alignment for data ref.");
2038 return false;
2041 return true;
2045 /* Analyze groups of strided accesses: check that DR belongs to a group of
2046 strided accesses of legal size, step, etc. Detect gaps, single element
2047 interleaving, and other special cases. Set strided access info.
2048 Collect groups of strided stores for further use in SLP analysis. */
2050 static bool
2051 vect_analyze_group_access (struct data_reference *dr)
2053 tree step = DR_STEP (dr);
2054 tree scalar_type = TREE_TYPE (DR_REF (dr));
2055 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2056 gimple stmt = DR_STMT (dr);
2057 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2058 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2059 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2060 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2061 HOST_WIDE_INT stride, last_accessed_element = 1;
2062 bool slp_impossible = false;
2063 struct loop *loop = NULL;
2065 if (loop_vinfo)
2066 loop = LOOP_VINFO_LOOP (loop_vinfo);
2068 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2069 interleaving group (including gaps). */
2070 stride = dr_step / type_size;
2072 /* Not consecutive access is possible only if it is a part of interleaving. */
2073 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2075 /* Check if it this DR is a part of interleaving, and is a single
2076 element of the group that is accessed in the loop. */
2078 /* Gaps are supported only for loads. STEP must be a multiple of the type
2079 size. The size of the group must be a power of 2. */
2080 if (DR_IS_READ (dr)
2081 && (dr_step % type_size) == 0
2082 && stride > 0
2083 && exact_log2 (stride) != -1)
2085 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2086 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2087 if (vect_print_dump_info (REPORT_DR_DETAILS))
2089 fprintf (vect_dump, "Detected single element interleaving ");
2090 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2091 fprintf (vect_dump, " step ");
2092 print_generic_expr (vect_dump, step, TDF_SLIM);
2095 if (loop_vinfo)
2097 if (vect_print_dump_info (REPORT_DETAILS))
2098 fprintf (vect_dump, "Data access with gaps requires scalar "
2099 "epilogue loop");
2100 if (loop->inner)
2102 if (vect_print_dump_info (REPORT_DETAILS))
2103 fprintf (vect_dump, "Peeling for outer loop is not"
2104 " supported");
2105 return false;
2108 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2111 return true;
2114 if (vect_print_dump_info (REPORT_DETAILS))
2116 fprintf (vect_dump, "not consecutive access ");
2117 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2120 if (bb_vinfo)
2122 /* Mark the statement as unvectorizable. */
2123 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2124 return true;
2127 return false;
2130 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2132 /* First stmt in the interleaving chain. Check the chain. */
2133 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2134 struct data_reference *data_ref = dr;
2135 unsigned int count = 1;
2136 tree next_step;
2137 tree prev_init = DR_INIT (data_ref);
2138 gimple prev = stmt;
2139 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2141 while (next)
2143 /* Skip same data-refs. In case that two or more stmts share
2144 data-ref (supported only for loads), we vectorize only the first
2145 stmt, and the rest get their vectorized loads from the first
2146 one. */
2147 if (!tree_int_cst_compare (DR_INIT (data_ref),
2148 DR_INIT (STMT_VINFO_DATA_REF (
2149 vinfo_for_stmt (next)))))
2151 if (DR_IS_WRITE (data_ref))
2153 if (vect_print_dump_info (REPORT_DETAILS))
2154 fprintf (vect_dump, "Two store stmts share the same dr.");
2155 return false;
2158 /* Check that there is no load-store dependencies for this loads
2159 to prevent a case of load-store-load to the same location. */
2160 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2161 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2163 if (vect_print_dump_info (REPORT_DETAILS))
2164 fprintf (vect_dump,
2165 "READ_WRITE dependence in interleaving.");
2166 return false;
2169 /* For load use the same data-ref load. */
2170 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2172 prev = next;
2173 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2174 continue;
2177 prev = next;
2179 /* Check that all the accesses have the same STEP. */
2180 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2181 if (tree_int_cst_compare (step, next_step))
2183 if (vect_print_dump_info (REPORT_DETAILS))
2184 fprintf (vect_dump, "not consecutive access in interleaving");
2185 return false;
2188 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2189 /* Check that the distance between two accesses is equal to the type
2190 size. Otherwise, we have gaps. */
2191 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2192 - TREE_INT_CST_LOW (prev_init)) / type_size;
2193 if (diff != 1)
2195 /* FORNOW: SLP of accesses with gaps is not supported. */
2196 slp_impossible = true;
2197 if (DR_IS_WRITE (data_ref))
2199 if (vect_print_dump_info (REPORT_DETAILS))
2200 fprintf (vect_dump, "interleaved store with gaps");
2201 return false;
2204 gaps += diff - 1;
2207 last_accessed_element += diff;
2209 /* Store the gap from the previous member of the group. If there is no
2210 gap in the access, GROUP_GAP is always 1. */
2211 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2213 prev_init = DR_INIT (data_ref);
2214 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2215 /* Count the number of data-refs in the chain. */
2216 count++;
2219 /* COUNT is the number of accesses found, we multiply it by the size of
2220 the type to get COUNT_IN_BYTES. */
2221 count_in_bytes = type_size * count;
2223 /* Check that the size of the interleaving (including gaps) is not
2224 greater than STEP. */
2225 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2227 if (vect_print_dump_info (REPORT_DETAILS))
2229 fprintf (vect_dump, "interleaving size is greater than step for ");
2230 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2232 return false;
2235 /* Check that the size of the interleaving is equal to STEP for stores,
2236 i.e., that there are no gaps. */
2237 if (dr_step && dr_step != count_in_bytes)
2239 if (DR_IS_READ (dr))
2241 slp_impossible = true;
2242 /* There is a gap after the last load in the group. This gap is a
2243 difference between the stride and the number of elements. When
2244 there is no gap, this difference should be 0. */
2245 GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2247 else
2249 if (vect_print_dump_info (REPORT_DETAILS))
2250 fprintf (vect_dump, "interleaved store with gaps");
2251 return false;
2255 /* Check that STEP is a multiple of type size. */
2256 if (dr_step && (dr_step % type_size) != 0)
2258 if (vect_print_dump_info (REPORT_DETAILS))
2260 fprintf (vect_dump, "step is not a multiple of type size: step ");
2261 print_generic_expr (vect_dump, step, TDF_SLIM);
2262 fprintf (vect_dump, " size ");
2263 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2264 TDF_SLIM);
2266 return false;
2269 if (stride == 0)
2270 stride = count;
2272 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2273 if (vect_print_dump_info (REPORT_DETAILS))
2274 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2276 /* SLP: create an SLP data structure for every interleaving group of
2277 stores for further analysis in vect_analyse_slp. */
2278 if (DR_IS_WRITE (dr) && !slp_impossible)
2280 if (loop_vinfo)
2281 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2282 stmt);
2283 if (bb_vinfo)
2284 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2285 stmt);
2288 /* There is a gap in the end of the group. */
2289 if (stride - last_accessed_element > 0 && loop_vinfo)
2291 if (vect_print_dump_info (REPORT_DETAILS))
2292 fprintf (vect_dump, "Data access with gaps requires scalar "
2293 "epilogue loop");
2294 if (loop->inner)
2296 if (vect_print_dump_info (REPORT_DETAILS))
2297 fprintf (vect_dump, "Peeling for outer loop is not supported");
2298 return false;
2301 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2305 return true;
2309 /* Analyze the access pattern of the data-reference DR.
2310 In case of non-consecutive accesses call vect_analyze_group_access() to
2311 analyze groups of strided accesses. */
2313 static bool
2314 vect_analyze_data_ref_access (struct data_reference *dr)
2316 tree step = DR_STEP (dr);
2317 tree scalar_type = TREE_TYPE (DR_REF (dr));
2318 gimple stmt = DR_STMT (dr);
2319 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2320 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2321 struct loop *loop = NULL;
2322 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2324 if (loop_vinfo)
2325 loop = LOOP_VINFO_LOOP (loop_vinfo);
2327 if (loop_vinfo && !step)
2329 if (vect_print_dump_info (REPORT_DETAILS))
2330 fprintf (vect_dump, "bad data-ref access in loop");
2331 return false;
2334 /* Allow invariant loads in loops. */
2335 if (loop_vinfo && dr_step == 0)
2337 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2338 return DR_IS_READ (dr);
2341 if (loop && nested_in_vect_loop_p (loop, stmt))
2343 /* Interleaved accesses are not yet supported within outer-loop
2344 vectorization for references in the inner-loop. */
2345 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2347 /* For the rest of the analysis we use the outer-loop step. */
2348 step = STMT_VINFO_DR_STEP (stmt_info);
2349 dr_step = TREE_INT_CST_LOW (step);
2351 if (dr_step == 0)
2353 if (vect_print_dump_info (REPORT_ALIGNMENT))
2354 fprintf (vect_dump, "zero step in outer loop.");
2355 if (DR_IS_READ (dr))
2356 return true;
2357 else
2358 return false;
2362 /* Consecutive? */
2363 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2364 || (dr_step < 0
2365 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2367 /* Mark that it is not interleaving. */
2368 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2369 return true;
2372 if (loop && nested_in_vect_loop_p (loop, stmt))
2374 if (vect_print_dump_info (REPORT_ALIGNMENT))
2375 fprintf (vect_dump, "strided access in outer loop.");
2376 return false;
2379 /* Not consecutive access - check if it's a part of interleaving group. */
2380 return vect_analyze_group_access (dr);
2384 /* Function vect_analyze_data_ref_accesses.
2386 Analyze the access pattern of all the data references in the loop.
2388 FORNOW: the only access pattern that is considered vectorizable is a
2389 simple step 1 (consecutive) access.
2391 FORNOW: handle only arrays and pointer accesses. */
2393 bool
2394 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2396 unsigned int i;
2397 VEC (data_reference_p, heap) *datarefs;
2398 struct data_reference *dr;
2400 if (vect_print_dump_info (REPORT_DETAILS))
2401 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2403 if (loop_vinfo)
2404 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2405 else
2406 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2408 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2409 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2410 && !vect_analyze_data_ref_access (dr))
2412 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2413 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2415 if (bb_vinfo)
2417 /* Mark the statement as not vectorizable. */
2418 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2419 continue;
2421 else
2422 return false;
2425 return true;
2428 /* Function vect_prune_runtime_alias_test_list.
2430 Prune a list of ddrs to be tested at run-time by versioning for alias.
2431 Return FALSE if resulting list of ddrs is longer then allowed by
2432 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2434 bool
2435 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2437 VEC (ddr_p, heap) * ddrs =
2438 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2439 unsigned i, j;
2441 if (vect_print_dump_info (REPORT_DETAILS))
2442 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2444 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2446 bool found;
2447 ddr_p ddr_i;
2449 ddr_i = VEC_index (ddr_p, ddrs, i);
2450 found = false;
2452 for (j = 0; j < i; j++)
2454 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2456 if (vect_vfa_range_equal (ddr_i, ddr_j))
2458 if (vect_print_dump_info (REPORT_DR_DETAILS))
2460 fprintf (vect_dump, "found equal ranges ");
2461 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2462 fprintf (vect_dump, ", ");
2463 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2464 fprintf (vect_dump, " and ");
2465 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2466 fprintf (vect_dump, ", ");
2467 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2469 found = true;
2470 break;
2474 if (found)
2476 VEC_ordered_remove (ddr_p, ddrs, i);
2477 continue;
2479 i++;
2482 if (VEC_length (ddr_p, ddrs) >
2483 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2485 if (vect_print_dump_info (REPORT_DR_DETAILS))
2487 fprintf (vect_dump,
2488 "disable versioning for alias - max number of generated "
2489 "checks exceeded.");
2492 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2494 return false;
2497 return true;
2500 /* Check whether a non-affine read in stmt is suitable for gather load
2501 and if so, return a builtin decl for that operation. */
2503 tree
2504 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2505 tree *offp, int *scalep)
2507 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2508 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2509 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2510 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2511 tree offtype = NULL_TREE;
2512 tree decl, base, off;
2513 enum machine_mode pmode;
2514 int punsignedp, pvolatilep;
2516 /* The gather builtins need address of the form
2517 loop_invariant + vector * {1, 2, 4, 8}
2519 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2520 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2521 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2522 multiplications and additions in it. To get a vector, we need
2523 a single SSA_NAME that will be defined in the loop and will
2524 contain everything that is not loop invariant and that can be
2525 vectorized. The following code attempts to find such a preexistng
2526 SSA_NAME OFF and put the loop invariants into a tree BASE
2527 that can be gimplified before the loop. */
2528 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2529 &pmode, &punsignedp, &pvolatilep, false);
2530 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2532 if (TREE_CODE (base) == MEM_REF)
2534 if (!integer_zerop (TREE_OPERAND (base, 1)))
2536 if (off == NULL_TREE)
2538 double_int moff = mem_ref_offset (base);
2539 off = double_int_to_tree (sizetype, moff);
2541 else
2542 off = size_binop (PLUS_EXPR, off,
2543 fold_convert (sizetype, TREE_OPERAND (base, 1)));
2545 base = TREE_OPERAND (base, 0);
2547 else
2548 base = build_fold_addr_expr (base);
2550 if (off == NULL_TREE)
2551 off = size_zero_node;
2553 /* If base is not loop invariant, either off is 0, then we start with just
2554 the constant offset in the loop invariant BASE and continue with base
2555 as OFF, otherwise give up.
2556 We could handle that case by gimplifying the addition of base + off
2557 into some SSA_NAME and use that as off, but for now punt. */
2558 if (!expr_invariant_in_loop_p (loop, base))
2560 if (!integer_zerop (off))
2561 return NULL_TREE;
2562 off = base;
2563 base = size_int (pbitpos / BITS_PER_UNIT);
2565 /* Otherwise put base + constant offset into the loop invariant BASE
2566 and continue with OFF. */
2567 else
2569 base = fold_convert (sizetype, base);
2570 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2573 /* OFF at this point may be either a SSA_NAME or some tree expression
2574 from get_inner_reference. Try to peel off loop invariants from it
2575 into BASE as long as possible. */
2576 STRIP_NOPS (off);
2577 while (offtype == NULL_TREE)
2579 enum tree_code code;
2580 tree op0, op1, add = NULL_TREE;
2582 if (TREE_CODE (off) == SSA_NAME)
2584 gimple def_stmt = SSA_NAME_DEF_STMT (off);
2586 if (expr_invariant_in_loop_p (loop, off))
2587 return NULL_TREE;
2589 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2590 break;
2592 op0 = gimple_assign_rhs1 (def_stmt);
2593 code = gimple_assign_rhs_code (def_stmt);
2594 op1 = gimple_assign_rhs2 (def_stmt);
2596 else
2598 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2599 return NULL_TREE;
2600 code = TREE_CODE (off);
2601 extract_ops_from_tree (off, &code, &op0, &op1);
2603 switch (code)
2605 case POINTER_PLUS_EXPR:
2606 case PLUS_EXPR:
2607 if (expr_invariant_in_loop_p (loop, op0))
2609 add = op0;
2610 off = op1;
2611 do_add:
2612 add = fold_convert (sizetype, add);
2613 if (scale != 1)
2614 add = size_binop (MULT_EXPR, add, size_int (scale));
2615 base = size_binop (PLUS_EXPR, base, add);
2616 continue;
2618 if (expr_invariant_in_loop_p (loop, op1))
2620 add = op1;
2621 off = op0;
2622 goto do_add;
2624 break;
2625 case MINUS_EXPR:
2626 if (expr_invariant_in_loop_p (loop, op1))
2628 add = fold_convert (sizetype, op1);
2629 add = size_binop (MINUS_EXPR, size_zero_node, add);
2630 off = op0;
2631 goto do_add;
2633 break;
2634 case MULT_EXPR:
2635 if (scale == 1 && host_integerp (op1, 0))
2637 scale = tree_low_cst (op1, 0);
2638 off = op0;
2639 continue;
2641 break;
2642 case SSA_NAME:
2643 off = op0;
2644 continue;
2645 CASE_CONVERT:
2646 if (!POINTER_TYPE_P (TREE_TYPE (op0))
2647 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2648 break;
2649 if (TYPE_PRECISION (TREE_TYPE (op0))
2650 == TYPE_PRECISION (TREE_TYPE (off)))
2652 off = op0;
2653 continue;
2655 if (TYPE_PRECISION (TREE_TYPE (op0))
2656 < TYPE_PRECISION (TREE_TYPE (off)))
2658 off = op0;
2659 offtype = TREE_TYPE (off);
2660 STRIP_NOPS (off);
2661 continue;
2663 break;
2664 default:
2665 break;
2667 break;
2670 /* If at the end OFF still isn't a SSA_NAME or isn't
2671 defined in the loop, punt. */
2672 if (TREE_CODE (off) != SSA_NAME
2673 || expr_invariant_in_loop_p (loop, off))
2674 return NULL_TREE;
2676 if (offtype == NULL_TREE)
2677 offtype = TREE_TYPE (off);
2679 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2680 offtype, scale);
2681 if (decl == NULL_TREE)
2682 return NULL_TREE;
2684 if (basep)
2685 *basep = base;
2686 if (offp)
2687 *offp = off;
2688 if (scalep)
2689 *scalep = scale;
2690 return decl;
2694 /* Function vect_analyze_data_refs.
2696 Find all the data references in the loop or basic block.
2698 The general structure of the analysis of data refs in the vectorizer is as
2699 follows:
2700 1- vect_analyze_data_refs(loop/bb): call
2701 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2702 in the loop/bb and their dependences.
2703 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2704 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2705 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2709 bool
2710 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2711 bb_vec_info bb_vinfo,
2712 int *min_vf)
2714 struct loop *loop = NULL;
2715 basic_block bb = NULL;
2716 unsigned int i;
2717 VEC (data_reference_p, heap) *datarefs;
2718 struct data_reference *dr;
2719 tree scalar_type;
2720 bool res, stop_bb_analysis = false;
2722 if (vect_print_dump_info (REPORT_DETAILS))
2723 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2725 if (loop_vinfo)
2727 loop = LOOP_VINFO_LOOP (loop_vinfo);
2728 res = compute_data_dependences_for_loop
2729 (loop, true,
2730 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2731 &LOOP_VINFO_DATAREFS (loop_vinfo),
2732 &LOOP_VINFO_DDRS (loop_vinfo));
2734 if (!res)
2736 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2737 fprintf (vect_dump, "not vectorized: loop contains function calls"
2738 " or data references that cannot be analyzed");
2739 return false;
2742 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2744 else
2746 bb = BB_VINFO_BB (bb_vinfo);
2747 res = compute_data_dependences_for_bb (bb, true,
2748 &BB_VINFO_DATAREFS (bb_vinfo),
2749 &BB_VINFO_DDRS (bb_vinfo));
2750 if (!res)
2752 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2753 fprintf (vect_dump, "not vectorized: basic block contains function"
2754 " calls or data references that cannot be analyzed");
2755 return false;
2758 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2761 /* Go through the data-refs, check that the analysis succeeded. Update
2762 pointer from stmt_vec_info struct to DR and vectype. */
2764 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2766 gimple stmt;
2767 stmt_vec_info stmt_info;
2768 tree base, offset, init;
2769 bool gather = false;
2770 int vf;
2772 if (!dr || !DR_REF (dr))
2774 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2775 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2777 return false;
2780 stmt = DR_STMT (dr);
2781 stmt_info = vinfo_for_stmt (stmt);
2783 if (stop_bb_analysis)
2785 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2786 continue;
2789 /* Check that analysis of the data-ref succeeded. */
2790 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2791 || !DR_STEP (dr))
2793 /* If target supports vector gather loads, see if they can't
2794 be used. */
2795 if (loop_vinfo
2796 && DR_IS_READ (dr)
2797 && !TREE_THIS_VOLATILE (DR_REF (dr))
2798 && targetm.vectorize.builtin_gather != NULL
2799 && !nested_in_vect_loop_p (loop, stmt))
2801 struct data_reference *newdr
2802 = create_data_ref (NULL, loop_containing_stmt (stmt),
2803 DR_REF (dr), stmt, true);
2804 gcc_assert (newdr != NULL && DR_REF (newdr));
2805 if (DR_BASE_ADDRESS (newdr)
2806 && DR_OFFSET (newdr)
2807 && DR_INIT (newdr)
2808 && DR_STEP (newdr)
2809 && integer_zerop (DR_STEP (newdr)))
2811 dr = newdr;
2812 gather = true;
2814 else
2815 free_data_ref (newdr);
2818 if (!gather)
2820 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2822 fprintf (vect_dump, "not vectorized: data ref analysis "
2823 "failed ");
2824 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2827 if (bb_vinfo)
2829 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2830 stop_bb_analysis = true;
2831 continue;
2834 return false;
2838 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2840 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2841 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2842 "constant");
2844 if (bb_vinfo)
2846 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2847 stop_bb_analysis = true;
2848 continue;
2851 if (gather)
2852 free_data_ref (dr);
2853 return false;
2856 if (TREE_THIS_VOLATILE (DR_REF (dr)))
2858 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2860 fprintf (vect_dump, "not vectorized: volatile type ");
2861 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2864 if (bb_vinfo)
2866 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2867 stop_bb_analysis = true;
2868 continue;
2871 return false;
2874 base = unshare_expr (DR_BASE_ADDRESS (dr));
2875 offset = unshare_expr (DR_OFFSET (dr));
2876 init = unshare_expr (DR_INIT (dr));
2878 if (stmt_can_throw_internal (stmt))
2880 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2882 fprintf (vect_dump, "not vectorized: statement can throw an "
2883 "exception ");
2884 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2887 if (bb_vinfo)
2889 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2890 stop_bb_analysis = true;
2891 continue;
2894 if (gather)
2895 free_data_ref (dr);
2896 return false;
2899 /* Update DR field in stmt_vec_info struct. */
2901 /* If the dataref is in an inner-loop of the loop that is considered for
2902 for vectorization, we also want to analyze the access relative to
2903 the outer-loop (DR contains information only relative to the
2904 inner-most enclosing loop). We do that by building a reference to the
2905 first location accessed by the inner-loop, and analyze it relative to
2906 the outer-loop. */
2907 if (loop && nested_in_vect_loop_p (loop, stmt))
2909 tree outer_step, outer_base, outer_init;
2910 HOST_WIDE_INT pbitsize, pbitpos;
2911 tree poffset;
2912 enum machine_mode pmode;
2913 int punsignedp, pvolatilep;
2914 affine_iv base_iv, offset_iv;
2915 tree dinit;
2917 /* Build a reference to the first location accessed by the
2918 inner-loop: *(BASE+INIT). (The first location is actually
2919 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2920 tree inner_base = build_fold_indirect_ref
2921 (fold_build_pointer_plus (base, init));
2923 if (vect_print_dump_info (REPORT_DETAILS))
2925 fprintf (vect_dump, "analyze in outer-loop: ");
2926 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2929 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2930 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2931 gcc_assert (outer_base != NULL_TREE);
2933 if (pbitpos % BITS_PER_UNIT != 0)
2935 if (vect_print_dump_info (REPORT_DETAILS))
2936 fprintf (vect_dump, "failed: bit offset alignment.\n");
2937 return false;
2940 outer_base = build_fold_addr_expr (outer_base);
2941 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2942 &base_iv, false))
2944 if (vect_print_dump_info (REPORT_DETAILS))
2945 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2946 return false;
2949 if (offset)
2951 if (poffset)
2952 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2953 poffset);
2954 else
2955 poffset = offset;
2958 if (!poffset)
2960 offset_iv.base = ssize_int (0);
2961 offset_iv.step = ssize_int (0);
2963 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2964 &offset_iv, false))
2966 if (vect_print_dump_info (REPORT_DETAILS))
2967 fprintf (vect_dump, "evolution of offset is not affine.\n");
2968 return false;
2971 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2972 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2973 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2974 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2975 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2977 outer_step = size_binop (PLUS_EXPR,
2978 fold_convert (ssizetype, base_iv.step),
2979 fold_convert (ssizetype, offset_iv.step));
2981 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2982 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2983 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2984 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2985 STMT_VINFO_DR_OFFSET (stmt_info) =
2986 fold_convert (ssizetype, offset_iv.base);
2987 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2988 size_int (highest_pow2_factor (offset_iv.base));
2990 if (vect_print_dump_info (REPORT_DETAILS))
2992 fprintf (vect_dump, "\touter base_address: ");
2993 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2994 fprintf (vect_dump, "\n\touter offset from base address: ");
2995 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2996 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2997 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2998 fprintf (vect_dump, "\n\touter step: ");
2999 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3000 fprintf (vect_dump, "\n\touter aligned to: ");
3001 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3005 if (STMT_VINFO_DATA_REF (stmt_info))
3007 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3009 fprintf (vect_dump,
3010 "not vectorized: more than one data ref in stmt: ");
3011 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3014 if (bb_vinfo)
3016 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3017 stop_bb_analysis = true;
3018 continue;
3021 if (gather)
3022 free_data_ref (dr);
3023 return false;
3026 STMT_VINFO_DATA_REF (stmt_info) = dr;
3028 /* Set vectype for STMT. */
3029 scalar_type = TREE_TYPE (DR_REF (dr));
3030 STMT_VINFO_VECTYPE (stmt_info) =
3031 get_vectype_for_scalar_type (scalar_type);
3032 if (!STMT_VINFO_VECTYPE (stmt_info))
3034 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3036 fprintf (vect_dump,
3037 "not vectorized: no vectype for stmt: ");
3038 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3039 fprintf (vect_dump, " scalar_type: ");
3040 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3043 if (bb_vinfo)
3045 /* Mark the statement as not vectorizable. */
3046 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3047 stop_bb_analysis = true;
3048 continue;
3051 if (gather)
3053 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3054 free_data_ref (dr);
3056 return false;
3059 /* Adjust the minimal vectorization factor according to the
3060 vector type. */
3061 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3062 if (vf > *min_vf)
3063 *min_vf = vf;
3065 if (gather)
3067 unsigned int j, k, n;
3068 struct data_reference *olddr
3069 = VEC_index (data_reference_p, datarefs, i);
3070 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3071 struct data_dependence_relation *ddr, *newddr;
3072 bool bad = false;
3073 tree off;
3074 VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3076 if (!vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL)
3077 || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3079 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3081 fprintf (vect_dump,
3082 "not vectorized: not suitable for gather ");
3083 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3085 return false;
3088 n = VEC_length (data_reference_p, datarefs) - 1;
3089 for (j = 0, k = i - 1; j < i; j++)
3091 ddr = VEC_index (ddr_p, ddrs, k);
3092 gcc_assert (DDR_B (ddr) == olddr);
3093 newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3094 nest);
3095 VEC_replace (ddr_p, ddrs, k, newddr);
3096 free_dependence_relation (ddr);
3097 if (!bad
3098 && DR_IS_WRITE (DDR_A (newddr))
3099 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3100 bad = true;
3101 k += --n;
3104 k++;
3105 n = k + VEC_length (data_reference_p, datarefs) - i - 1;
3106 for (; k < n; k++)
3108 ddr = VEC_index (ddr_p, ddrs, k);
3109 gcc_assert (DDR_A (ddr) == olddr);
3110 newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3111 nest);
3112 VEC_replace (ddr_p, ddrs, k, newddr);
3113 free_dependence_relation (ddr);
3114 if (!bad
3115 && DR_IS_WRITE (DDR_B (newddr))
3116 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3117 bad = true;
3120 k = VEC_length (ddr_p, ddrs)
3121 - VEC_length (data_reference_p, datarefs) + i;
3122 ddr = VEC_index (ddr_p, ddrs, k);
3123 gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3124 newddr = initialize_data_dependence_relation (dr, dr, nest);
3125 VEC_replace (ddr_p, ddrs, k, newddr);
3126 free_dependence_relation (ddr);
3127 VEC_replace (data_reference_p, datarefs, i, dr);
3129 if (bad)
3131 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3133 fprintf (vect_dump,
3134 "not vectorized: data dependence conflict"
3135 " prevents gather");
3136 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3138 return false;
3141 STMT_VINFO_GATHER_P (stmt_info) = true;
3145 return true;
3149 /* Function vect_get_new_vect_var.
3151 Returns a name for a new variable. The current naming scheme appends the
3152 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3153 the name of vectorizer generated variables, and appends that to NAME if
3154 provided. */
3156 tree
3157 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3159 const char *prefix;
3160 tree new_vect_var;
3162 switch (var_kind)
3164 case vect_simple_var:
3165 prefix = "vect_";
3166 break;
3167 case vect_scalar_var:
3168 prefix = "stmp_";
3169 break;
3170 case vect_pointer_var:
3171 prefix = "vect_p";
3172 break;
3173 default:
3174 gcc_unreachable ();
3177 if (name)
3179 char* tmp = concat (prefix, name, NULL);
3180 new_vect_var = create_tmp_var (type, tmp);
3181 free (tmp);
3183 else
3184 new_vect_var = create_tmp_var (type, prefix);
3186 /* Mark vector typed variable as a gimple register variable. */
3187 if (TREE_CODE (type) == VECTOR_TYPE)
3188 DECL_GIMPLE_REG_P (new_vect_var) = true;
3190 return new_vect_var;
3194 /* Function vect_create_addr_base_for_vector_ref.
3196 Create an expression that computes the address of the first memory location
3197 that will be accessed for a data reference.
3199 Input:
3200 STMT: The statement containing the data reference.
3201 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3202 OFFSET: Optional. If supplied, it is be added to the initial address.
3203 LOOP: Specify relative to which loop-nest should the address be computed.
3204 For example, when the dataref is in an inner-loop nested in an
3205 outer-loop that is now being vectorized, LOOP can be either the
3206 outer-loop, or the inner-loop. The first memory location accessed
3207 by the following dataref ('in' points to short):
3209 for (i=0; i<N; i++)
3210 for (j=0; j<M; j++)
3211 s += in[i+j]
3213 is as follows:
3214 if LOOP=i_loop: &in (relative to i_loop)
3215 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3217 Output:
3218 1. Return an SSA_NAME whose value is the address of the memory location of
3219 the first vector of the data reference.
3220 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3221 these statement(s) which define the returned SSA_NAME.
3223 FORNOW: We are only handling array accesses with step 1. */
3225 tree
3226 vect_create_addr_base_for_vector_ref (gimple stmt,
3227 gimple_seq *new_stmt_list,
3228 tree offset,
3229 struct loop *loop)
3231 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3232 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3233 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3234 tree base_name;
3235 tree data_ref_base_var;
3236 tree vec_stmt;
3237 tree addr_base, addr_expr;
3238 tree dest;
3239 gimple_seq seq = NULL;
3240 tree base_offset = unshare_expr (DR_OFFSET (dr));
3241 tree init = unshare_expr (DR_INIT (dr));
3242 tree vect_ptr_type;
3243 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3244 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3245 tree base;
3247 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3249 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3251 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3253 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3254 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3255 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3258 if (loop_vinfo)
3259 base_name = build_fold_indirect_ref (data_ref_base);
3260 else
3262 base_offset = ssize_int (0);
3263 init = ssize_int (0);
3264 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
3267 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3268 add_referenced_var (data_ref_base_var);
3269 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3270 data_ref_base_var);
3271 gimple_seq_add_seq (new_stmt_list, seq);
3273 /* Create base_offset */
3274 base_offset = size_binop (PLUS_EXPR,
3275 fold_convert (sizetype, base_offset),
3276 fold_convert (sizetype, init));
3277 dest = create_tmp_var (sizetype, "base_off");
3278 add_referenced_var (dest);
3279 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3280 gimple_seq_add_seq (new_stmt_list, seq);
3282 if (offset)
3284 tree tmp = create_tmp_var (sizetype, "offset");
3286 add_referenced_var (tmp);
3287 offset = fold_build2 (MULT_EXPR, sizetype,
3288 fold_convert (sizetype, offset), step);
3289 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3290 base_offset, offset);
3291 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3292 gimple_seq_add_seq (new_stmt_list, seq);
3295 /* base + base_offset */
3296 if (loop_vinfo)
3297 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3298 else
3300 addr_base = build1 (ADDR_EXPR,
3301 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3302 unshare_expr (DR_REF (dr)));
3305 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3306 base = get_base_address (DR_REF (dr));
3307 if (base
3308 && TREE_CODE (base) == MEM_REF)
3309 vect_ptr_type
3310 = build_qualified_type (vect_ptr_type,
3311 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3313 vec_stmt = fold_convert (vect_ptr_type, addr_base);
3314 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3315 get_name (base_name));
3316 add_referenced_var (addr_expr);
3317 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3318 gimple_seq_add_seq (new_stmt_list, seq);
3320 if (DR_PTR_INFO (dr)
3321 && TREE_CODE (vec_stmt) == SSA_NAME)
3323 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3324 if (offset)
3326 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
3327 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
3331 if (vect_print_dump_info (REPORT_DETAILS))
3333 fprintf (vect_dump, "created ");
3334 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
3337 return vec_stmt;
3341 /* Function vect_create_data_ref_ptr.
3343 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3344 location accessed in the loop by STMT, along with the def-use update
3345 chain to appropriately advance the pointer through the loop iterations.
3346 Also set aliasing information for the pointer. This pointer is used by
3347 the callers to this function to create a memory reference expression for
3348 vector load/store access.
3350 Input:
3351 1. STMT: a stmt that references memory. Expected to be of the form
3352 GIMPLE_ASSIGN <name, data-ref> or
3353 GIMPLE_ASSIGN <data-ref, name>.
3354 2. AGGR_TYPE: the type of the reference, which should be either a vector
3355 or an array.
3356 3. AT_LOOP: the loop where the vector memref is to be created.
3357 4. OFFSET (optional): an offset to be added to the initial address accessed
3358 by the data-ref in STMT.
3359 5. BSI: location where the new stmts are to be placed if there is no loop
3360 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3361 pointing to the initial address.
3363 Output:
3364 1. Declare a new ptr to vector_type, and have it point to the base of the
3365 data reference (initial addressed accessed by the data reference).
3366 For example, for vector of type V8HI, the following code is generated:
3368 v8hi *ap;
3369 ap = (v8hi *)initial_address;
3371 if OFFSET is not supplied:
3372 initial_address = &a[init];
3373 if OFFSET is supplied:
3374 initial_address = &a[init + OFFSET];
3376 Return the initial_address in INITIAL_ADDRESS.
3378 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3379 update the pointer in each iteration of the loop.
3381 Return the increment stmt that updates the pointer in PTR_INCR.
3383 3. Set INV_P to true if the access pattern of the data reference in the
3384 vectorized loop is invariant. Set it to false otherwise.
3386 4. Return the pointer. */
3388 tree
3389 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3390 tree offset, tree *initial_address,
3391 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3392 bool only_init, bool *inv_p)
3394 tree base_name;
3395 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3396 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3397 struct loop *loop = NULL;
3398 bool nested_in_vect_loop = false;
3399 struct loop *containing_loop = NULL;
3400 tree aggr_ptr_type;
3401 tree aggr_ptr;
3402 tree new_temp;
3403 gimple vec_stmt;
3404 gimple_seq new_stmt_list = NULL;
3405 edge pe = NULL;
3406 basic_block new_bb;
3407 tree aggr_ptr_init;
3408 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3409 tree aptr;
3410 gimple_stmt_iterator incr_gsi;
3411 bool insert_after;
3412 bool negative;
3413 tree indx_before_incr, indx_after_incr;
3414 gimple incr;
3415 tree step;
3416 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3417 tree base;
3419 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3420 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3422 if (loop_vinfo)
3424 loop = LOOP_VINFO_LOOP (loop_vinfo);
3425 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3426 containing_loop = (gimple_bb (stmt))->loop_father;
3427 pe = loop_preheader_edge (loop);
3429 else
3431 gcc_assert (bb_vinfo);
3432 only_init = true;
3433 *ptr_incr = NULL;
3436 /* Check the step (evolution) of the load in LOOP, and record
3437 whether it's invariant. */
3438 if (nested_in_vect_loop)
3439 step = STMT_VINFO_DR_STEP (stmt_info);
3440 else
3441 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3443 if (tree_int_cst_compare (step, size_zero_node) == 0)
3444 *inv_p = true;
3445 else
3446 *inv_p = false;
3447 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3449 /* Create an expression for the first address accessed by this load
3450 in LOOP. */
3451 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3453 if (vect_print_dump_info (REPORT_DETAILS))
3455 tree data_ref_base = base_name;
3456 fprintf (vect_dump, "create %s-pointer variable to type: ",
3457 tree_code_name[(int) TREE_CODE (aggr_type)]);
3458 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3459 if (TREE_CODE (data_ref_base) == VAR_DECL
3460 || TREE_CODE (data_ref_base) == ARRAY_REF)
3461 fprintf (vect_dump, " vectorizing an array ref: ");
3462 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3463 fprintf (vect_dump, " vectorizing a record based array ref: ");
3464 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3465 fprintf (vect_dump, " vectorizing a pointer ref: ");
3466 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3469 /* (1) Create the new aggregate-pointer variable. */
3470 aggr_ptr_type = build_pointer_type (aggr_type);
3471 base = get_base_address (DR_REF (dr));
3472 if (base
3473 && TREE_CODE (base) == MEM_REF)
3474 aggr_ptr_type
3475 = build_qualified_type (aggr_ptr_type,
3476 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3477 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3478 get_name (base_name));
3480 /* Vector and array types inherit the alias set of their component
3481 type by default so we need to use a ref-all pointer if the data
3482 reference does not conflict with the created aggregated data
3483 reference because it is not addressable. */
3484 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3485 get_alias_set (DR_REF (dr))))
3487 aggr_ptr_type
3488 = build_pointer_type_for_mode (aggr_type,
3489 TYPE_MODE (aggr_ptr_type), true);
3490 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3491 get_name (base_name));
3494 /* Likewise for any of the data references in the stmt group. */
3495 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3497 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3500 tree lhs = gimple_assign_lhs (orig_stmt);
3501 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3502 get_alias_set (lhs)))
3504 aggr_ptr_type
3505 = build_pointer_type_for_mode (aggr_type,
3506 TYPE_MODE (aggr_ptr_type), true);
3507 aggr_ptr
3508 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3509 get_name (base_name));
3510 break;
3513 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3515 while (orig_stmt);
3518 add_referenced_var (aggr_ptr);
3520 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3521 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3522 def-use update cycles for the pointer: one relative to the outer-loop
3523 (LOOP), which is what steps (3) and (4) below do. The other is relative
3524 to the inner-loop (which is the inner-most loop containing the dataref),
3525 and this is done be step (5) below.
3527 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3528 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3529 redundant. Steps (3),(4) create the following:
3531 vp0 = &base_addr;
3532 LOOP: vp1 = phi(vp0,vp2)
3535 vp2 = vp1 + step
3536 goto LOOP
3538 If there is an inner-loop nested in loop, then step (5) will also be
3539 applied, and an additional update in the inner-loop will be created:
3541 vp0 = &base_addr;
3542 LOOP: vp1 = phi(vp0,vp2)
3544 inner: vp3 = phi(vp1,vp4)
3545 vp4 = vp3 + inner_step
3546 if () goto inner
3548 vp2 = vp1 + step
3549 if () goto LOOP */
3551 /* (2) Calculate the initial address of the aggregate-pointer, and set
3552 the aggregate-pointer to point to it before the loop. */
3554 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3556 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3557 offset, loop);
3558 if (new_stmt_list)
3560 if (pe)
3562 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3563 gcc_assert (!new_bb);
3565 else
3566 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3569 *initial_address = new_temp;
3571 /* Create: p = (aggr_type *) initial_base */
3572 if (TREE_CODE (new_temp) != SSA_NAME
3573 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3575 vec_stmt = gimple_build_assign (aggr_ptr,
3576 fold_convert (aggr_ptr_type, new_temp));
3577 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3578 /* Copy the points-to information if it exists. */
3579 if (DR_PTR_INFO (dr))
3580 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3581 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3582 if (pe)
3584 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3585 gcc_assert (!new_bb);
3587 else
3588 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3590 else
3591 aggr_ptr_init = new_temp;
3593 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3594 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3595 inner-loop nested in LOOP (during outer-loop vectorization). */
3597 /* No update in loop is required. */
3598 if (only_init && (!loop_vinfo || at_loop == loop))
3599 aptr = aggr_ptr_init;
3600 else
3602 /* The step of the aggregate pointer is the type size. */
3603 tree step = TYPE_SIZE_UNIT (aggr_type);
3604 /* One exception to the above is when the scalar step of the load in
3605 LOOP is zero. In this case the step here is also zero. */
3606 if (*inv_p)
3607 step = size_zero_node;
3608 else if (negative)
3609 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3611 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3613 create_iv (aggr_ptr_init,
3614 fold_convert (aggr_ptr_type, step),
3615 aggr_ptr, loop, &incr_gsi, insert_after,
3616 &indx_before_incr, &indx_after_incr);
3617 incr = gsi_stmt (incr_gsi);
3618 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3620 /* Copy the points-to information if it exists. */
3621 if (DR_PTR_INFO (dr))
3623 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3624 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3626 if (ptr_incr)
3627 *ptr_incr = incr;
3629 aptr = indx_before_incr;
3632 if (!nested_in_vect_loop || only_init)
3633 return aptr;
3636 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3637 nested in LOOP, if exists. */
3639 gcc_assert (nested_in_vect_loop);
3640 if (!only_init)
3642 standard_iv_increment_position (containing_loop, &incr_gsi,
3643 &insert_after);
3644 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3645 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3646 &indx_after_incr);
3647 incr = gsi_stmt (incr_gsi);
3648 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3650 /* Copy the points-to information if it exists. */
3651 if (DR_PTR_INFO (dr))
3653 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3654 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3656 if (ptr_incr)
3657 *ptr_incr = incr;
3659 return indx_before_incr;
3661 else
3662 gcc_unreachable ();
3666 /* Function bump_vector_ptr
3668 Increment a pointer (to a vector type) by vector-size. If requested,
3669 i.e. if PTR-INCR is given, then also connect the new increment stmt
3670 to the existing def-use update-chain of the pointer, by modifying
3671 the PTR_INCR as illustrated below:
3673 The pointer def-use update-chain before this function:
3674 DATAREF_PTR = phi (p_0, p_2)
3675 ....
3676 PTR_INCR: p_2 = DATAREF_PTR + step
3678 The pointer def-use update-chain after this function:
3679 DATAREF_PTR = phi (p_0, p_2)
3680 ....
3681 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3682 ....
3683 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3685 Input:
3686 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3687 in the loop.
3688 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3689 the loop. The increment amount across iterations is expected
3690 to be vector_size.
3691 BSI - location where the new update stmt is to be placed.
3692 STMT - the original scalar memory-access stmt that is being vectorized.
3693 BUMP - optional. The offset by which to bump the pointer. If not given,
3694 the offset is assumed to be vector_size.
3696 Output: Return NEW_DATAREF_PTR as illustrated above.
3700 tree
3701 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3702 gimple stmt, tree bump)
3704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3705 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3706 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3707 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3708 tree update = TYPE_SIZE_UNIT (vectype);
3709 gimple incr_stmt;
3710 ssa_op_iter iter;
3711 use_operand_p use_p;
3712 tree new_dataref_ptr;
3714 if (bump)
3715 update = bump;
3717 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3718 dataref_ptr, update);
3719 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3720 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3721 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3723 /* Copy the points-to information if it exists. */
3724 if (DR_PTR_INFO (dr))
3726 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3727 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3728 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3731 if (!ptr_incr)
3732 return new_dataref_ptr;
3734 /* Update the vector-pointer's cross-iteration increment. */
3735 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3737 tree use = USE_FROM_PTR (use_p);
3739 if (use == dataref_ptr)
3740 SET_USE (use_p, new_dataref_ptr);
3741 else
3742 gcc_assert (tree_int_cst_compare (use, update) == 0);
3745 return new_dataref_ptr;
3749 /* Function vect_create_destination_var.
3751 Create a new temporary of type VECTYPE. */
3753 tree
3754 vect_create_destination_var (tree scalar_dest, tree vectype)
3756 tree vec_dest;
3757 const char *new_name;
3758 tree type;
3759 enum vect_var_kind kind;
3761 kind = vectype ? vect_simple_var : vect_scalar_var;
3762 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3764 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3766 new_name = get_name (scalar_dest);
3767 if (!new_name)
3768 new_name = "var_";
3769 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3770 add_referenced_var (vec_dest);
3772 return vec_dest;
3775 /* Function vect_strided_store_supported.
3777 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3778 and FALSE otherwise. */
3780 bool
3781 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3783 optab ih_optab, il_optab;
3784 enum machine_mode mode;
3786 mode = TYPE_MODE (vectype);
3788 /* vect_permute_store_chain requires the group size to be a power of two. */
3789 if (exact_log2 (count) == -1)
3791 if (vect_print_dump_info (REPORT_DETAILS))
3792 fprintf (vect_dump, "the size of the group of strided accesses"
3793 " is not a power of 2");
3794 return false;
3797 /* Check that the operation is supported. */
3798 ih_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3799 vectype, optab_default);
3800 il_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3801 vectype, optab_default);
3802 if (il_optab && ih_optab
3803 && optab_handler (ih_optab, mode) != CODE_FOR_nothing
3804 && optab_handler (il_optab, mode) != CODE_FOR_nothing)
3805 return true;
3807 if (can_vec_perm_for_code_p (VEC_INTERLEAVE_HIGH_EXPR, mode, NULL)
3808 && can_vec_perm_for_code_p (VEC_INTERLEAVE_LOW_EXPR, mode, NULL))
3809 return true;
3811 if (vect_print_dump_info (REPORT_DETAILS))
3812 fprintf (vect_dump, "interleave op not supported by target.");
3813 return false;
3817 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3818 type VECTYPE. */
3820 bool
3821 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3823 return vect_lanes_optab_supported_p ("vec_store_lanes",
3824 vec_store_lanes_optab,
3825 vectype, count);
3829 /* Function vect_permute_store_chain.
3831 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3832 a power of 2, generate interleave_high/low stmts to reorder the data
3833 correctly for the stores. Return the final references for stores in
3834 RESULT_CHAIN.
3836 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3837 The input is 4 vectors each containing 8 elements. We assign a number to
3838 each element, the input sequence is:
3840 1st vec: 0 1 2 3 4 5 6 7
3841 2nd vec: 8 9 10 11 12 13 14 15
3842 3rd vec: 16 17 18 19 20 21 22 23
3843 4th vec: 24 25 26 27 28 29 30 31
3845 The output sequence should be:
3847 1st vec: 0 8 16 24 1 9 17 25
3848 2nd vec: 2 10 18 26 3 11 19 27
3849 3rd vec: 4 12 20 28 5 13 21 30
3850 4th vec: 6 14 22 30 7 15 23 31
3852 i.e., we interleave the contents of the four vectors in their order.
3854 We use interleave_high/low instructions to create such output. The input of
3855 each interleave_high/low operation is two vectors:
3856 1st vec 2nd vec
3857 0 1 2 3 4 5 6 7
3858 the even elements of the result vector are obtained left-to-right from the
3859 high/low elements of the first vector. The odd elements of the result are
3860 obtained left-to-right from the high/low elements of the second vector.
3861 The output of interleave_high will be: 0 4 1 5
3862 and of interleave_low: 2 6 3 7
3865 The permutation is done in log LENGTH stages. In each stage interleave_high
3866 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3867 where the first argument is taken from the first half of DR_CHAIN and the
3868 second argument from it's second half.
3869 In our example,
3871 I1: interleave_high (1st vec, 3rd vec)
3872 I2: interleave_low (1st vec, 3rd vec)
3873 I3: interleave_high (2nd vec, 4th vec)
3874 I4: interleave_low (2nd vec, 4th vec)
3876 The output for the first stage is:
3878 I1: 0 16 1 17 2 18 3 19
3879 I2: 4 20 5 21 6 22 7 23
3880 I3: 8 24 9 25 10 26 11 27
3881 I4: 12 28 13 29 14 30 15 31
3883 The output of the second stage, i.e. the final result is:
3885 I1: 0 8 16 24 1 9 17 25
3886 I2: 2 10 18 26 3 11 19 27
3887 I3: 4 12 20 28 5 13 21 30
3888 I4: 6 14 22 30 7 15 23 31. */
3890 void
3891 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3892 unsigned int length,
3893 gimple stmt,
3894 gimple_stmt_iterator *gsi,
3895 VEC(tree,heap) **result_chain)
3897 tree perm_dest, vect1, vect2, high, low;
3898 gimple perm_stmt;
3899 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3900 int i;
3901 unsigned int j;
3902 enum tree_code high_code, low_code;
3904 gcc_assert (vect_strided_store_supported (vectype, length));
3906 *result_chain = VEC_copy (tree, heap, dr_chain);
3908 for (i = 0; i < exact_log2 (length); i++)
3910 for (j = 0; j < length/2; j++)
3912 vect1 = VEC_index (tree, dr_chain, j);
3913 vect2 = VEC_index (tree, dr_chain, j+length/2);
3915 /* Create interleaving stmt:
3916 in the case of big endian:
3917 high = interleave_high (vect1, vect2)
3918 and in the case of little endian:
3919 high = interleave_low (vect1, vect2). */
3920 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3921 DECL_GIMPLE_REG_P (perm_dest) = 1;
3922 add_referenced_var (perm_dest);
3923 if (BYTES_BIG_ENDIAN)
3925 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3926 low_code = VEC_INTERLEAVE_LOW_EXPR;
3928 else
3930 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3931 high_code = VEC_INTERLEAVE_LOW_EXPR;
3933 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3934 vect1, vect2);
3935 high = make_ssa_name (perm_dest, perm_stmt);
3936 gimple_assign_set_lhs (perm_stmt, high);
3937 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3938 VEC_replace (tree, *result_chain, 2*j, high);
3940 /* Create interleaving stmt:
3941 in the case of big endian:
3942 low = interleave_low (vect1, vect2)
3943 and in the case of little endian:
3944 low = interleave_high (vect1, vect2). */
3945 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3946 DECL_GIMPLE_REG_P (perm_dest) = 1;
3947 add_referenced_var (perm_dest);
3948 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3949 vect1, vect2);
3950 low = make_ssa_name (perm_dest, perm_stmt);
3951 gimple_assign_set_lhs (perm_stmt, low);
3952 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3953 VEC_replace (tree, *result_chain, 2*j+1, low);
3955 dr_chain = VEC_copy (tree, heap, *result_chain);
3959 /* Function vect_setup_realignment
3961 This function is called when vectorizing an unaligned load using
3962 the dr_explicit_realign[_optimized] scheme.
3963 This function generates the following code at the loop prolog:
3965 p = initial_addr;
3966 x msq_init = *(floor(p)); # prolog load
3967 realignment_token = call target_builtin;
3968 loop:
3969 x msq = phi (msq_init, ---)
3971 The stmts marked with x are generated only for the case of
3972 dr_explicit_realign_optimized.
3974 The code above sets up a new (vector) pointer, pointing to the first
3975 location accessed by STMT, and a "floor-aligned" load using that pointer.
3976 It also generates code to compute the "realignment-token" (if the relevant
3977 target hook was defined), and creates a phi-node at the loop-header bb
3978 whose arguments are the result of the prolog-load (created by this
3979 function) and the result of a load that takes place in the loop (to be
3980 created by the caller to this function).
3982 For the case of dr_explicit_realign_optimized:
3983 The caller to this function uses the phi-result (msq) to create the
3984 realignment code inside the loop, and sets up the missing phi argument,
3985 as follows:
3986 loop:
3987 msq = phi (msq_init, lsq)
3988 lsq = *(floor(p')); # load in loop
3989 result = realign_load (msq, lsq, realignment_token);
3991 For the case of dr_explicit_realign:
3992 loop:
3993 msq = *(floor(p)); # load in loop
3994 p' = p + (VS-1);
3995 lsq = *(floor(p')); # load in loop
3996 result = realign_load (msq, lsq, realignment_token);
3998 Input:
3999 STMT - (scalar) load stmt to be vectorized. This load accesses
4000 a memory location that may be unaligned.
4001 BSI - place where new code is to be inserted.
4002 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4003 is used.
4005 Output:
4006 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4007 target hook, if defined.
4008 Return value - the result of the loop-header phi node. */
4010 tree
4011 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4012 tree *realignment_token,
4013 enum dr_alignment_support alignment_support_scheme,
4014 tree init_addr,
4015 struct loop **at_loop)
4017 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4018 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4019 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4020 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4021 struct loop *loop = NULL;
4022 edge pe = NULL;
4023 tree scalar_dest = gimple_assign_lhs (stmt);
4024 tree vec_dest;
4025 gimple inc;
4026 tree ptr;
4027 tree data_ref;
4028 gimple new_stmt;
4029 basic_block new_bb;
4030 tree msq_init = NULL_TREE;
4031 tree new_temp;
4032 gimple phi_stmt;
4033 tree msq = NULL_TREE;
4034 gimple_seq stmts = NULL;
4035 bool inv_p;
4036 bool compute_in_loop = false;
4037 bool nested_in_vect_loop = false;
4038 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4039 struct loop *loop_for_initial_load = NULL;
4041 if (loop_vinfo)
4043 loop = LOOP_VINFO_LOOP (loop_vinfo);
4044 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4047 gcc_assert (alignment_support_scheme == dr_explicit_realign
4048 || alignment_support_scheme == dr_explicit_realign_optimized);
4050 /* We need to generate three things:
4051 1. the misalignment computation
4052 2. the extra vector load (for the optimized realignment scheme).
4053 3. the phi node for the two vectors from which the realignment is
4054 done (for the optimized realignment scheme). */
4056 /* 1. Determine where to generate the misalignment computation.
4058 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4059 calculation will be generated by this function, outside the loop (in the
4060 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4061 caller, inside the loop.
4063 Background: If the misalignment remains fixed throughout the iterations of
4064 the loop, then both realignment schemes are applicable, and also the
4065 misalignment computation can be done outside LOOP. This is because we are
4066 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4067 are a multiple of VS (the Vector Size), and therefore the misalignment in
4068 different vectorized LOOP iterations is always the same.
4069 The problem arises only if the memory access is in an inner-loop nested
4070 inside LOOP, which is now being vectorized using outer-loop vectorization.
4071 This is the only case when the misalignment of the memory access may not
4072 remain fixed throughout the iterations of the inner-loop (as explained in
4073 detail in vect_supportable_dr_alignment). In this case, not only is the
4074 optimized realignment scheme not applicable, but also the misalignment
4075 computation (and generation of the realignment token that is passed to
4076 REALIGN_LOAD) have to be done inside the loop.
4078 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4079 or not, which in turn determines if the misalignment is computed inside
4080 the inner-loop, or outside LOOP. */
4082 if (init_addr != NULL_TREE || !loop_vinfo)
4084 compute_in_loop = true;
4085 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4089 /* 2. Determine where to generate the extra vector load.
4091 For the optimized realignment scheme, instead of generating two vector
4092 loads in each iteration, we generate a single extra vector load in the
4093 preheader of the loop, and in each iteration reuse the result of the
4094 vector load from the previous iteration. In case the memory access is in
4095 an inner-loop nested inside LOOP, which is now being vectorized using
4096 outer-loop vectorization, we need to determine whether this initial vector
4097 load should be generated at the preheader of the inner-loop, or can be
4098 generated at the preheader of LOOP. If the memory access has no evolution
4099 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4100 to be generated inside LOOP (in the preheader of the inner-loop). */
4102 if (nested_in_vect_loop)
4104 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4105 bool invariant_in_outerloop =
4106 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4107 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4109 else
4110 loop_for_initial_load = loop;
4111 if (at_loop)
4112 *at_loop = loop_for_initial_load;
4114 if (loop_for_initial_load)
4115 pe = loop_preheader_edge (loop_for_initial_load);
4117 /* 3. For the case of the optimized realignment, create the first vector
4118 load at the loop preheader. */
4120 if (alignment_support_scheme == dr_explicit_realign_optimized)
4122 /* Create msq_init = *(floor(p1)) in the loop preheader */
4124 gcc_assert (!compute_in_loop);
4125 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4126 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4127 NULL_TREE, &init_addr, NULL, &inc,
4128 true, &inv_p);
4129 new_stmt = gimple_build_assign_with_ops
4130 (BIT_AND_EXPR, NULL_TREE, ptr,
4131 build_int_cst (TREE_TYPE (ptr),
4132 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4133 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
4134 gimple_assign_set_lhs (new_stmt, new_temp);
4135 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4136 gcc_assert (!new_bb);
4137 data_ref
4138 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4139 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4140 new_stmt = gimple_build_assign (vec_dest, data_ref);
4141 new_temp = make_ssa_name (vec_dest, new_stmt);
4142 gimple_assign_set_lhs (new_stmt, new_temp);
4143 mark_symbols_for_renaming (new_stmt);
4144 if (pe)
4146 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4147 gcc_assert (!new_bb);
4149 else
4150 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4152 msq_init = gimple_assign_lhs (new_stmt);
4155 /* 4. Create realignment token using a target builtin, if available.
4156 It is done either inside the containing loop, or before LOOP (as
4157 determined above). */
4159 if (targetm.vectorize.builtin_mask_for_load)
4161 tree builtin_decl;
4163 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4164 if (!init_addr)
4166 /* Generate the INIT_ADDR computation outside LOOP. */
4167 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4168 NULL_TREE, loop);
4169 if (loop)
4171 pe = loop_preheader_edge (loop);
4172 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4173 gcc_assert (!new_bb);
4175 else
4176 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4179 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4180 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4181 vec_dest =
4182 vect_create_destination_var (scalar_dest,
4183 gimple_call_return_type (new_stmt));
4184 new_temp = make_ssa_name (vec_dest, new_stmt);
4185 gimple_call_set_lhs (new_stmt, new_temp);
4187 if (compute_in_loop)
4188 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4189 else
4191 /* Generate the misalignment computation outside LOOP. */
4192 pe = loop_preheader_edge (loop);
4193 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4194 gcc_assert (!new_bb);
4197 *realignment_token = gimple_call_lhs (new_stmt);
4199 /* The result of the CALL_EXPR to this builtin is determined from
4200 the value of the parameter and no global variables are touched
4201 which makes the builtin a "const" function. Requiring the
4202 builtin to have the "const" attribute makes it unnecessary
4203 to call mark_call_clobbered. */
4204 gcc_assert (TREE_READONLY (builtin_decl));
4207 if (alignment_support_scheme == dr_explicit_realign)
4208 return msq;
4210 gcc_assert (!compute_in_loop);
4211 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4214 /* 5. Create msq = phi <msq_init, lsq> in loop */
4216 pe = loop_preheader_edge (containing_loop);
4217 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4218 msq = make_ssa_name (vec_dest, NULL);
4219 phi_stmt = create_phi_node (msq, containing_loop->header);
4220 SSA_NAME_DEF_STMT (msq) = phi_stmt;
4221 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4223 return msq;
4227 /* Function vect_strided_load_supported.
4229 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
4230 and FALSE otherwise. */
4232 bool
4233 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4235 optab ee_optab, eo_optab;
4236 enum machine_mode mode;
4238 mode = TYPE_MODE (vectype);
4240 /* vect_permute_load_chain requires the group size to be a power of two. */
4241 if (exact_log2 (count) == -1)
4243 if (vect_print_dump_info (REPORT_DETAILS))
4244 fprintf (vect_dump, "the size of the group of strided accesses"
4245 " is not a power of 2");
4246 return false;
4249 ee_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR,
4250 vectype, optab_default);
4251 eo_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR,
4252 vectype, optab_default);
4253 if (ee_optab && eo_optab
4254 && optab_handler (ee_optab, mode) != CODE_FOR_nothing
4255 && optab_handler (eo_optab, mode) != CODE_FOR_nothing)
4256 return true;
4258 if (can_vec_perm_for_code_p (VEC_EXTRACT_EVEN_EXPR, mode, NULL)
4259 && can_vec_perm_for_code_p (VEC_EXTRACT_ODD_EXPR, mode, NULL))
4260 return true;
4262 if (vect_print_dump_info (REPORT_DETAILS))
4263 fprintf (vect_dump, "extract even/odd not supported by target");
4264 return false;
4267 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4268 type VECTYPE. */
4270 bool
4271 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4273 return vect_lanes_optab_supported_p ("vec_load_lanes",
4274 vec_load_lanes_optab,
4275 vectype, count);
4278 /* Function vect_permute_load_chain.
4280 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4281 a power of 2, generate extract_even/odd stmts to reorder the input data
4282 correctly. Return the final references for loads in RESULT_CHAIN.
4284 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4285 The input is 4 vectors each containing 8 elements. We assign a number to each
4286 element, the input sequence is:
4288 1st vec: 0 1 2 3 4 5 6 7
4289 2nd vec: 8 9 10 11 12 13 14 15
4290 3rd vec: 16 17 18 19 20 21 22 23
4291 4th vec: 24 25 26 27 28 29 30 31
4293 The output sequence should be:
4295 1st vec: 0 4 8 12 16 20 24 28
4296 2nd vec: 1 5 9 13 17 21 25 29
4297 3rd vec: 2 6 10 14 18 22 26 30
4298 4th vec: 3 7 11 15 19 23 27 31
4300 i.e., the first output vector should contain the first elements of each
4301 interleaving group, etc.
4303 We use extract_even/odd instructions to create such output. The input of
4304 each extract_even/odd operation is two vectors
4305 1st vec 2nd vec
4306 0 1 2 3 4 5 6 7
4308 and the output is the vector of extracted even/odd elements. The output of
4309 extract_even will be: 0 2 4 6
4310 and of extract_odd: 1 3 5 7
4313 The permutation is done in log LENGTH stages. In each stage extract_even
4314 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4315 their order. In our example,
4317 E1: extract_even (1st vec, 2nd vec)
4318 E2: extract_odd (1st vec, 2nd vec)
4319 E3: extract_even (3rd vec, 4th vec)
4320 E4: extract_odd (3rd vec, 4th vec)
4322 The output for the first stage will be:
4324 E1: 0 2 4 6 8 10 12 14
4325 E2: 1 3 5 7 9 11 13 15
4326 E3: 16 18 20 22 24 26 28 30
4327 E4: 17 19 21 23 25 27 29 31
4329 In order to proceed and create the correct sequence for the next stage (or
4330 for the correct output, if the second stage is the last one, as in our
4331 example), we first put the output of extract_even operation and then the
4332 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4333 The input for the second stage is:
4335 1st vec (E1): 0 2 4 6 8 10 12 14
4336 2nd vec (E3): 16 18 20 22 24 26 28 30
4337 3rd vec (E2): 1 3 5 7 9 11 13 15
4338 4th vec (E4): 17 19 21 23 25 27 29 31
4340 The output of the second stage:
4342 E1: 0 4 8 12 16 20 24 28
4343 E2: 2 6 10 14 18 22 26 30
4344 E3: 1 5 9 13 17 21 25 29
4345 E4: 3 7 11 15 19 23 27 31
4347 And RESULT_CHAIN after reordering:
4349 1st vec (E1): 0 4 8 12 16 20 24 28
4350 2nd vec (E3): 1 5 9 13 17 21 25 29
4351 3rd vec (E2): 2 6 10 14 18 22 26 30
4352 4th vec (E4): 3 7 11 15 19 23 27 31. */
4354 static void
4355 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4356 unsigned int length,
4357 gimple stmt,
4358 gimple_stmt_iterator *gsi,
4359 VEC(tree,heap) **result_chain)
4361 tree perm_dest, data_ref, first_vect, second_vect;
4362 gimple perm_stmt;
4363 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4364 int i;
4365 unsigned int j;
4367 gcc_assert (vect_strided_load_supported (vectype, length));
4369 *result_chain = VEC_copy (tree, heap, dr_chain);
4370 for (i = 0; i < exact_log2 (length); i++)
4372 for (j = 0; j < length; j +=2)
4374 first_vect = VEC_index (tree, dr_chain, j);
4375 second_vect = VEC_index (tree, dr_chain, j+1);
4377 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4378 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4379 DECL_GIMPLE_REG_P (perm_dest) = 1;
4380 add_referenced_var (perm_dest);
4382 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
4383 perm_dest, first_vect,
4384 second_vect);
4386 data_ref = make_ssa_name (perm_dest, perm_stmt);
4387 gimple_assign_set_lhs (perm_stmt, data_ref);
4388 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4389 mark_symbols_for_renaming (perm_stmt);
4391 VEC_replace (tree, *result_chain, j/2, data_ref);
4393 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4394 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4395 DECL_GIMPLE_REG_P (perm_dest) = 1;
4396 add_referenced_var (perm_dest);
4398 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
4399 perm_dest, first_vect,
4400 second_vect);
4401 data_ref = make_ssa_name (perm_dest, perm_stmt);
4402 gimple_assign_set_lhs (perm_stmt, data_ref);
4403 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4404 mark_symbols_for_renaming (perm_stmt);
4406 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4408 dr_chain = VEC_copy (tree, heap, *result_chain);
4413 /* Function vect_transform_strided_load.
4415 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4416 to perform their permutation and ascribe the result vectorized statements to
4417 the scalar statements.
4420 void
4421 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4422 gimple_stmt_iterator *gsi)
4424 VEC(tree,heap) *result_chain = NULL;
4426 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4427 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4428 vectors, that are ready for vector computation. */
4429 result_chain = VEC_alloc (tree, heap, size);
4430 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4431 vect_record_strided_load_vectors (stmt, result_chain);
4432 VEC_free (tree, heap, result_chain);
4435 /* RESULT_CHAIN contains the output of a group of strided loads that were
4436 generated as part of the vectorization of STMT. Assign the statement
4437 for each vector to the associated scalar statement. */
4439 void
4440 vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4442 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4443 gimple next_stmt, new_stmt;
4444 unsigned int i, gap_count;
4445 tree tmp_data_ref;
4447 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4448 Since we scan the chain starting from it's first node, their order
4449 corresponds the order of data-refs in RESULT_CHAIN. */
4450 next_stmt = first_stmt;
4451 gap_count = 1;
4452 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4454 if (!next_stmt)
4455 break;
4457 /* Skip the gaps. Loads created for the gaps will be removed by dead
4458 code elimination pass later. No need to check for the first stmt in
4459 the group, since it always exists.
4460 GROUP_GAP is the number of steps in elements from the previous
4461 access (if there is no gap GROUP_GAP is 1). We skip loads that
4462 correspond to the gaps. */
4463 if (next_stmt != first_stmt
4464 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4466 gap_count++;
4467 continue;
4470 while (next_stmt)
4472 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4473 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4474 copies, and we put the new vector statement in the first available
4475 RELATED_STMT. */
4476 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4477 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4478 else
4480 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4482 gimple prev_stmt =
4483 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4484 gimple rel_stmt =
4485 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4486 while (rel_stmt)
4488 prev_stmt = rel_stmt;
4489 rel_stmt =
4490 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4493 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4494 new_stmt;
4498 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4499 gap_count = 1;
4500 /* If NEXT_STMT accesses the same DR as the previous statement,
4501 put the same TMP_DATA_REF as its vectorized statement; otherwise
4502 get the next data-ref from RESULT_CHAIN. */
4503 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4504 break;
4509 /* Function vect_force_dr_alignment_p.
4511 Returns whether the alignment of a DECL can be forced to be aligned
4512 on ALIGNMENT bit boundary. */
4514 bool
4515 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4517 if (TREE_CODE (decl) != VAR_DECL)
4518 return false;
4520 if (DECL_EXTERNAL (decl))
4521 return false;
4523 if (TREE_ASM_WRITTEN (decl))
4524 return false;
4526 if (TREE_STATIC (decl))
4527 return (alignment <= MAX_OFILE_ALIGNMENT);
4528 else
4529 return (alignment <= MAX_STACK_ALIGNMENT);
4533 /* Return whether the data reference DR is supported with respect to its
4534 alignment.
4535 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4536 it is aligned, i.e., check if it is possible to vectorize it with different
4537 alignment. */
4539 enum dr_alignment_support
4540 vect_supportable_dr_alignment (struct data_reference *dr,
4541 bool check_aligned_accesses)
4543 gimple stmt = DR_STMT (dr);
4544 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4545 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4546 enum machine_mode mode = TYPE_MODE (vectype);
4547 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4548 struct loop *vect_loop = NULL;
4549 bool nested_in_vect_loop = false;
4551 if (aligned_access_p (dr) && !check_aligned_accesses)
4552 return dr_aligned;
4554 if (loop_vinfo)
4556 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4557 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4560 /* Possibly unaligned access. */
4562 /* We can choose between using the implicit realignment scheme (generating
4563 a misaligned_move stmt) and the explicit realignment scheme (generating
4564 aligned loads with a REALIGN_LOAD). There are two variants to the
4565 explicit realignment scheme: optimized, and unoptimized.
4566 We can optimize the realignment only if the step between consecutive
4567 vector loads is equal to the vector size. Since the vector memory
4568 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4569 is guaranteed that the misalignment amount remains the same throughout the
4570 execution of the vectorized loop. Therefore, we can create the
4571 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4572 at the loop preheader.
4574 However, in the case of outer-loop vectorization, when vectorizing a
4575 memory access in the inner-loop nested within the LOOP that is now being
4576 vectorized, while it is guaranteed that the misalignment of the
4577 vectorized memory access will remain the same in different outer-loop
4578 iterations, it is *not* guaranteed that is will remain the same throughout
4579 the execution of the inner-loop. This is because the inner-loop advances
4580 with the original scalar step (and not in steps of VS). If the inner-loop
4581 step happens to be a multiple of VS, then the misalignment remains fixed
4582 and we can use the optimized realignment scheme. For example:
4584 for (i=0; i<N; i++)
4585 for (j=0; j<M; j++)
4586 s += a[i+j];
4588 When vectorizing the i-loop in the above example, the step between
4589 consecutive vector loads is 1, and so the misalignment does not remain
4590 fixed across the execution of the inner-loop, and the realignment cannot
4591 be optimized (as illustrated in the following pseudo vectorized loop):
4593 for (i=0; i<N; i+=4)
4594 for (j=0; j<M; j++){
4595 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4596 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4597 // (assuming that we start from an aligned address).
4600 We therefore have to use the unoptimized realignment scheme:
4602 for (i=0; i<N; i+=4)
4603 for (j=k; j<M; j+=4)
4604 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4605 // that the misalignment of the initial address is
4606 // 0).
4608 The loop can then be vectorized as follows:
4610 for (k=0; k<4; k++){
4611 rt = get_realignment_token (&vp[k]);
4612 for (i=0; i<N; i+=4){
4613 v1 = vp[i+k];
4614 for (j=k; j<M; j+=4){
4615 v2 = vp[i+j+VS-1];
4616 va = REALIGN_LOAD <v1,v2,rt>;
4617 vs += va;
4618 v1 = v2;
4621 } */
4623 if (DR_IS_READ (dr))
4625 bool is_packed = false;
4626 tree type = (TREE_TYPE (DR_REF (dr)));
4628 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4629 && (!targetm.vectorize.builtin_mask_for_load
4630 || targetm.vectorize.builtin_mask_for_load ()))
4632 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4633 if ((nested_in_vect_loop
4634 && (TREE_INT_CST_LOW (DR_STEP (dr))
4635 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4636 || !loop_vinfo)
4637 return dr_explicit_realign;
4638 else
4639 return dr_explicit_realign_optimized;
4641 if (!known_alignment_for_access_p (dr))
4643 tree ba = DR_BASE_OBJECT (dr);
4645 if (ba)
4646 is_packed = contains_packed_reference (ba);
4649 if (targetm.vectorize.
4650 support_vector_misalignment (mode, type,
4651 DR_MISALIGNMENT (dr), is_packed))
4652 /* Can't software pipeline the loads, but can at least do them. */
4653 return dr_unaligned_supported;
4655 else
4657 bool is_packed = false;
4658 tree type = (TREE_TYPE (DR_REF (dr)));
4660 if (!known_alignment_for_access_p (dr))
4662 tree ba = DR_BASE_OBJECT (dr);
4664 if (ba)
4665 is_packed = contains_packed_reference (ba);
4668 if (targetm.vectorize.
4669 support_vector_misalignment (mode, type,
4670 DR_MISALIGNMENT (dr), is_packed))
4671 return dr_unaligned_supported;
4674 /* Unsupported. */
4675 return dr_unaligned_unsupported;