2012-03-17 Janne Blomqvist <jb@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-data-refs.c
blobd6bfe6c60733f044b98ebe7edd1b6ee40d69f973
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
49 static bool
50 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 tree vectype, unsigned HOST_WIDE_INT count)
53 enum machine_mode mode, array_mode;
54 bool limit_p;
56 mode = TYPE_MODE (vectype);
57 limit_p = !targetm.array_mode_supported_p (mode, count);
58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
59 MODE_INT, limit_p);
61 if (array_mode == BLKmode)
63 if (vect_print_dump_info (REPORT_DETAILS))
64 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65 GET_MODE_NAME (mode), count);
66 return false;
69 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
71 if (vect_print_dump_info (REPORT_DETAILS))
72 fprintf (vect_dump, "cannot use %s<%s><%s>",
73 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
74 return false;
77 if (vect_print_dump_info (REPORT_DETAILS))
78 fprintf (vect_dump, "can use %s<%s><%s>",
79 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
81 return true;
85 /* Return the smallest scalar part of STMT.
86 This is used to determine the vectype of the stmt. We generally set the
87 vectype according to the type of the result (lhs). For stmts whose
88 result-type is different than the type of the arguments (e.g., demotion,
89 promotion), vectype will be reset appropriately (later). Note that we have
90 to visit the smallest datatype in this function, because that determines the
91 VF. If the smallest datatype in the loop is present only as the rhs of a
92 promotion operation - we'd miss it.
93 Such a case, where a variable of this datatype does not appear in the lhs
94 anywhere in the loop, can only occur if it's an invariant: e.g.:
95 'int_x = (int) short_inv', which we'd expect to have been optimized away by
96 invariant motion. However, we cannot rely on invariant motion to always
97 take invariants out of the loop, and so in the case of promotion we also
98 have to check the rhs.
99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
100 types. */
102 tree
103 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104 HOST_WIDE_INT *rhs_size_unit)
106 tree scalar_type = gimple_expr_type (stmt);
107 HOST_WIDE_INT lhs, rhs;
109 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
111 if (is_gimple_assign (stmt)
112 && (gimple_assign_cast_p (stmt)
113 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119 if (rhs < lhs)
120 scalar_type = rhs_type;
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
125 return scalar_type;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135 gimple next_stmt = first_stmt;
136 int result = 0;
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139 return -1;
141 while (next_stmt && next_stmt != stmt)
143 result++;
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
147 if (next_stmt)
148 return result;
149 else
150 return -1;
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
158 static void
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
162 gimple prev, next;
163 tree next_init;
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169 while (next)
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
174 /* Insert here. */
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177 return;
179 prev = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
196 FIRST_DR = DRB
197 NEXT_DR (DRB) = DRA
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
204 FIRST_DR = DRB
205 NEXT(FIRST_DR) = previous FIRST_DR
206 else
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
212 static void
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
220 tree node_init;
221 gimple node, prev, next, first_stmt;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229 return;
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
238 return;
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246 old_first_stmt)));
247 gimple tmp;
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
259 while (tmp)
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
265 else
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
271 return;
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
278 return;
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
291 else
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
301 while (node)
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305 while (next)
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
310 /* Insert here. */
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313 prev = node;
314 break;
316 prev = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
319 if (!next)
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324 prev = node;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
337 static bool
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342 gimple earlier_stmt;
344 /* We only call this function for pairs of loads and stores, but we verify
345 it here. */
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 if (DR_IS_READ (dra))
349 return false;
350 else
351 return true;
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
358 return true;
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
367 return true;
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
374 return false;
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383 return false;
385 return true;
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
394 static bool
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
406 return false;
408 /* Check:
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
412 data-refs' inits. */
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
420 return false;
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
426 if (init_a > init_b)
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
432 if (step && (init_a - init_b) > step)
433 return false;
435 if (diff_mod_size == 0)
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
445 return true;
448 else
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
454 if (step && (init_b - init_a) > step)
455 return false;
457 if (diff_mod_size == 0)
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
467 return true;
471 return false;
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
478 static bool
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489 return true;
490 else
491 return false;
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
497 static bool
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504 return true;
505 else
506 return false;
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
513 static bool
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519 return false;
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
529 if (optimize_loop_nest_for_size_p (loop))
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
533 return false;
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
537 if (loop->inner)
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541 return false;
544 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
545 return true;
549 /* Function vect_analyze_data_ref_dependence.
551 Return TRUE if there (might) exist a dependence between a memory-reference
552 DRA and a memory-reference DRB. When versioning for alias may check a
553 dependence at run-time, return FALSE. Adjust *MAX_VF according to
554 the data dependence. */
556 static bool
557 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
558 loop_vec_info loop_vinfo, int *max_vf)
560 unsigned int i;
561 struct loop *loop = NULL;
562 struct data_reference *dra = DDR_A (ddr);
563 struct data_reference *drb = DDR_B (ddr);
564 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
565 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
566 lambda_vector dist_v;
567 unsigned int loop_depth;
569 /* Don't bother to analyze statements marked as unvectorizable. */
570 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
571 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
572 return false;
574 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
576 /* Independent data accesses. */
577 vect_check_interleaving (dra, drb);
578 return false;
581 if (loop_vinfo)
582 loop = LOOP_VINFO_LOOP (loop_vinfo);
584 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
585 return false;
587 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
589 gimple earlier_stmt;
591 if (loop_vinfo)
593 if (vect_print_dump_info (REPORT_DR_DETAILS))
595 fprintf (vect_dump, "versioning for alias required: "
596 "can't determine dependence between ");
597 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
598 fprintf (vect_dump, " and ");
599 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
602 /* Add to list of ddrs that need to be tested at run-time. */
603 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
606 /* When vectorizing a basic block unknown depnedence can still mean
607 strided access. */
608 if (vect_check_interleaving (dra, drb))
609 return false;
611 /* Read-read is OK (we need this check here, after checking for
612 interleaving). */
613 if (DR_IS_READ (dra) && DR_IS_READ (drb))
614 return false;
616 if (vect_print_dump_info (REPORT_DR_DETAILS))
618 fprintf (vect_dump, "can't determine dependence between ");
619 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
620 fprintf (vect_dump, " and ");
621 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
624 /* We do not vectorize basic blocks with write-write dependencies. */
625 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
626 return true;
628 /* Check that it's not a load-after-store dependence. */
629 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
630 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
631 return true;
633 return false;
636 /* Versioning for alias is not yet supported for basic block SLP, and
637 dependence distance is unapplicable, hence, in case of known data
638 dependence, basic block vectorization is impossible for now. */
639 if (!loop_vinfo)
641 if (dra != drb && vect_check_interleaving (dra, drb))
642 return false;
644 if (vect_print_dump_info (REPORT_DR_DETAILS))
646 fprintf (vect_dump, "determined dependence between ");
647 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
648 fprintf (vect_dump, " and ");
649 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
652 /* Do not vectorize basic blcoks with write-write dependences. */
653 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
654 return true;
656 /* Check if this dependence is allowed in basic block vectorization. */
657 return vect_drs_dependent_in_basic_block (dra, drb);
660 /* Loop-based vectorization and known data dependence. */
661 if (DDR_NUM_DIST_VECTS (ddr) == 0)
663 if (vect_print_dump_info (REPORT_DR_DETAILS))
665 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
666 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
667 fprintf (vect_dump, " and ");
668 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
670 /* Add to list of ddrs that need to be tested at run-time. */
671 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
674 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
675 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
677 int dist = dist_v[loop_depth];
679 if (vect_print_dump_info (REPORT_DR_DETAILS))
680 fprintf (vect_dump, "dependence distance = %d.", dist);
682 if (dist == 0)
684 if (vect_print_dump_info (REPORT_DR_DETAILS))
686 fprintf (vect_dump, "dependence distance == 0 between ");
687 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
688 fprintf (vect_dump, " and ");
689 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
692 /* For interleaving, mark that there is a read-write dependency if
693 necessary. We check before that one of the data-refs is store. */
694 if (DR_IS_READ (dra))
695 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
696 else
698 if (DR_IS_READ (drb))
699 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
702 continue;
705 if (dist > 0 && DDR_REVERSED_P (ddr))
707 /* If DDR_REVERSED_P the order of the data-refs in DDR was
708 reversed (to make distance vector positive), and the actual
709 distance is negative. */
710 if (vect_print_dump_info (REPORT_DR_DETAILS))
711 fprintf (vect_dump, "dependence distance negative.");
712 continue;
715 if (abs (dist) >= 2
716 && abs (dist) < *max_vf)
718 /* The dependence distance requires reduction of the maximal
719 vectorization factor. */
720 *max_vf = abs (dist);
721 if (vect_print_dump_info (REPORT_DR_DETAILS))
722 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
723 *max_vf);
726 if (abs (dist) >= *max_vf)
728 /* Dependence distance does not create dependence, as far as
729 vectorization is concerned, in this case. */
730 if (vect_print_dump_info (REPORT_DR_DETAILS))
731 fprintf (vect_dump, "dependence distance >= VF.");
732 continue;
735 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
737 fprintf (vect_dump, "not vectorized, possible dependence "
738 "between data-refs ");
739 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
740 fprintf (vect_dump, " and ");
741 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
744 return true;
747 return false;
750 /* Function vect_analyze_data_ref_dependences.
752 Examine all the data references in the loop, and make sure there do not
753 exist any data dependences between them. Set *MAX_VF according to
754 the maximum vectorization factor the data dependences allow. */
756 bool
757 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
758 bb_vec_info bb_vinfo, int *max_vf)
760 unsigned int i;
761 VEC (ddr_p, heap) *ddrs = NULL;
762 struct data_dependence_relation *ddr;
764 if (vect_print_dump_info (REPORT_DETAILS))
765 fprintf (vect_dump, "=== vect_analyze_dependences ===");
767 if (loop_vinfo)
768 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
769 else
770 ddrs = BB_VINFO_DDRS (bb_vinfo);
772 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
773 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
774 return false;
776 return true;
780 /* Function vect_compute_data_ref_alignment
782 Compute the misalignment of the data reference DR.
784 Output:
785 1. If during the misalignment computation it is found that the data reference
786 cannot be vectorized then false is returned.
787 2. DR_MISALIGNMENT (DR) is defined.
789 FOR NOW: No analysis is actually performed. Misalignment is calculated
790 only for trivial cases. TODO. */
792 static bool
793 vect_compute_data_ref_alignment (struct data_reference *dr)
795 gimple stmt = DR_STMT (dr);
796 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
797 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
798 struct loop *loop = NULL;
799 tree ref = DR_REF (dr);
800 tree vectype;
801 tree base, base_addr;
802 bool base_aligned;
803 tree misalign;
804 tree aligned_to, alignment;
806 if (vect_print_dump_info (REPORT_DETAILS))
807 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
809 if (loop_vinfo)
810 loop = LOOP_VINFO_LOOP (loop_vinfo);
812 /* Initialize misalignment to unknown. */
813 SET_DR_MISALIGNMENT (dr, -1);
815 misalign = DR_INIT (dr);
816 aligned_to = DR_ALIGNED_TO (dr);
817 base_addr = DR_BASE_ADDRESS (dr);
818 vectype = STMT_VINFO_VECTYPE (stmt_info);
820 /* In case the dataref is in an inner-loop of the loop that is being
821 vectorized (LOOP), we use the base and misalignment information
822 relative to the outer-loop (LOOP). This is ok only if the misalignment
823 stays the same throughout the execution of the inner-loop, which is why
824 we have to check that the stride of the dataref in the inner-loop evenly
825 divides by the vector size. */
826 if (loop && nested_in_vect_loop_p (loop, stmt))
828 tree step = DR_STEP (dr);
829 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
831 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
833 if (vect_print_dump_info (REPORT_ALIGNMENT))
834 fprintf (vect_dump, "inner step divides the vector-size.");
835 misalign = STMT_VINFO_DR_INIT (stmt_info);
836 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
837 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
839 else
841 if (vect_print_dump_info (REPORT_ALIGNMENT))
842 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
843 misalign = NULL_TREE;
847 base = build_fold_indirect_ref (base_addr);
848 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
850 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
851 || !misalign)
853 if (vect_print_dump_info (REPORT_ALIGNMENT))
855 fprintf (vect_dump, "Unknown alignment for access: ");
856 print_generic_expr (vect_dump, base, TDF_SLIM);
858 return true;
861 if ((DECL_P (base)
862 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
863 alignment) >= 0)
864 || (TREE_CODE (base_addr) == SSA_NAME
865 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
866 TREE_TYPE (base_addr)))),
867 alignment) >= 0)
868 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
869 base_aligned = true;
870 else
871 base_aligned = false;
873 if (!base_aligned)
875 /* Do not change the alignment of global variables here if
876 flag_section_anchors is enabled as we already generated
877 RTL for other functions. Most global variables should
878 have been aligned during the IPA increase_alignment pass. */
879 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
880 || (TREE_STATIC (base) && flag_section_anchors))
882 if (vect_print_dump_info (REPORT_DETAILS))
884 fprintf (vect_dump, "can't force alignment of ref: ");
885 print_generic_expr (vect_dump, ref, TDF_SLIM);
887 return true;
890 /* Force the alignment of the decl.
891 NOTE: This is the only change to the code we make during
892 the analysis phase, before deciding to vectorize the loop. */
893 if (vect_print_dump_info (REPORT_DETAILS))
895 fprintf (vect_dump, "force alignment of ");
896 print_generic_expr (vect_dump, ref, TDF_SLIM);
899 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
900 DECL_USER_ALIGN (base) = 1;
903 /* At this point we assume that the base is aligned. */
904 gcc_assert (base_aligned
905 || (TREE_CODE (base) == VAR_DECL
906 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
908 /* If this is a backward running DR then first access in the larger
909 vectype actually is N-1 elements before the address in the DR.
910 Adjust misalign accordingly. */
911 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
913 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
914 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
915 otherwise we wouldn't be here. */
916 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
917 /* PLUS because DR_STEP was negative. */
918 misalign = size_binop (PLUS_EXPR, misalign, offset);
921 /* Modulo alignment. */
922 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
924 if (!host_integerp (misalign, 1))
926 /* Negative or overflowed misalignment value. */
927 if (vect_print_dump_info (REPORT_DETAILS))
928 fprintf (vect_dump, "unexpected misalign value");
929 return false;
932 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
934 if (vect_print_dump_info (REPORT_DETAILS))
936 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
937 print_generic_expr (vect_dump, ref, TDF_SLIM);
940 return true;
944 /* Function vect_compute_data_refs_alignment
946 Compute the misalignment of data references in the loop.
947 Return FALSE if a data reference is found that cannot be vectorized. */
949 static bool
950 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
951 bb_vec_info bb_vinfo)
953 VEC (data_reference_p, heap) *datarefs;
954 struct data_reference *dr;
955 unsigned int i;
957 if (loop_vinfo)
958 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
959 else
960 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
962 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
963 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
964 && !vect_compute_data_ref_alignment (dr))
966 if (bb_vinfo)
968 /* Mark unsupported statement as unvectorizable. */
969 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
970 continue;
972 else
973 return false;
976 return true;
980 /* Function vect_update_misalignment_for_peel
982 DR - the data reference whose misalignment is to be adjusted.
983 DR_PEEL - the data reference whose misalignment is being made
984 zero in the vector loop by the peel.
985 NPEEL - the number of iterations in the peel loop if the misalignment
986 of DR_PEEL is known at compile time. */
988 static void
989 vect_update_misalignment_for_peel (struct data_reference *dr,
990 struct data_reference *dr_peel, int npeel)
992 unsigned int i;
993 VEC(dr_p,heap) *same_align_drs;
994 struct data_reference *current_dr;
995 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
996 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
997 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
998 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1000 /* For interleaved data accesses the step in the loop must be multiplied by
1001 the size of the interleaving group. */
1002 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1003 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1004 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1005 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1007 /* It can be assumed that the data refs with the same alignment as dr_peel
1008 are aligned in the vector loop. */
1009 same_align_drs
1010 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1011 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1013 if (current_dr != dr)
1014 continue;
1015 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1016 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1017 SET_DR_MISALIGNMENT (dr, 0);
1018 return;
1021 if (known_alignment_for_access_p (dr)
1022 && known_alignment_for_access_p (dr_peel))
1024 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1025 int misal = DR_MISALIGNMENT (dr);
1026 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1027 misal += negative ? -npeel * dr_size : npeel * dr_size;
1028 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1029 SET_DR_MISALIGNMENT (dr, misal);
1030 return;
1033 if (vect_print_dump_info (REPORT_DETAILS))
1034 fprintf (vect_dump, "Setting misalignment to -1.");
1035 SET_DR_MISALIGNMENT (dr, -1);
1039 /* Function vect_verify_datarefs_alignment
1041 Return TRUE if all data references in the loop can be
1042 handled with respect to alignment. */
1044 bool
1045 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1047 VEC (data_reference_p, heap) *datarefs;
1048 struct data_reference *dr;
1049 enum dr_alignment_support supportable_dr_alignment;
1050 unsigned int i;
1052 if (loop_vinfo)
1053 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1054 else
1055 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1057 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1059 gimple stmt = DR_STMT (dr);
1060 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1062 /* For interleaving, only the alignment of the first access matters.
1063 Skip statements marked as not vectorizable. */
1064 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1065 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1066 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1067 continue;
1069 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1070 if (!supportable_dr_alignment)
1072 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1074 if (DR_IS_READ (dr))
1075 fprintf (vect_dump,
1076 "not vectorized: unsupported unaligned load.");
1077 else
1078 fprintf (vect_dump,
1079 "not vectorized: unsupported unaligned store.");
1081 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1083 return false;
1085 if (supportable_dr_alignment != dr_aligned
1086 && vect_print_dump_info (REPORT_ALIGNMENT))
1087 fprintf (vect_dump, "Vectorizing an unaligned access.");
1089 return true;
1093 /* Function vector_alignment_reachable_p
1095 Return true if vector alignment for DR is reachable by peeling
1096 a few loop iterations. Return false otherwise. */
1098 static bool
1099 vector_alignment_reachable_p (struct data_reference *dr)
1101 gimple stmt = DR_STMT (dr);
1102 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1103 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1105 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1107 /* For interleaved access we peel only if number of iterations in
1108 the prolog loop ({VF - misalignment}), is a multiple of the
1109 number of the interleaved accesses. */
1110 int elem_size, mis_in_elements;
1111 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1113 /* FORNOW: handle only known alignment. */
1114 if (!known_alignment_for_access_p (dr))
1115 return false;
1117 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1118 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1120 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1121 return false;
1124 /* If misalignment is known at the compile time then allow peeling
1125 only if natural alignment is reachable through peeling. */
1126 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1128 HOST_WIDE_INT elmsize =
1129 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1130 if (vect_print_dump_info (REPORT_DETAILS))
1132 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1133 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1135 if (DR_MISALIGNMENT (dr) % elmsize)
1137 if (vect_print_dump_info (REPORT_DETAILS))
1138 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1139 return false;
1143 if (!known_alignment_for_access_p (dr))
1145 tree type = (TREE_TYPE (DR_REF (dr)));
1146 bool is_packed = contains_packed_reference (DR_REF (dr));
1148 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1149 is_packed = true;
1151 if (vect_print_dump_info (REPORT_DETAILS))
1152 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1153 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1154 return true;
1155 else
1156 return false;
1159 return true;
1163 /* Calculate the cost of the memory access represented by DR. */
1165 static void
1166 vect_get_data_access_cost (struct data_reference *dr,
1167 unsigned int *inside_cost,
1168 unsigned int *outside_cost)
1170 gimple stmt = DR_STMT (dr);
1171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1172 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1173 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1174 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1175 int ncopies = vf / nunits;
1176 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1178 if (!supportable_dr_alignment)
1179 *inside_cost = VECT_MAX_COST;
1180 else
1182 if (DR_IS_READ (dr))
1183 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1184 else
1185 vect_get_store_cost (dr, ncopies, inside_cost);
1188 if (vect_print_dump_info (REPORT_COST))
1189 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1190 "outside_cost = %d.", *inside_cost, *outside_cost);
1194 static hashval_t
1195 vect_peeling_hash (const void *elem)
1197 const struct _vect_peel_info *peel_info;
1199 peel_info = (const struct _vect_peel_info *) elem;
1200 return (hashval_t) peel_info->npeel;
1204 static int
1205 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1207 const struct _vect_peel_info *a, *b;
1209 a = (const struct _vect_peel_info *) elem1;
1210 b = (const struct _vect_peel_info *) elem2;
1211 return (a->npeel == b->npeel);
1215 /* Insert DR into peeling hash table with NPEEL as key. */
1217 static void
1218 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1219 int npeel)
1221 struct _vect_peel_info elem, *slot;
1222 void **new_slot;
1223 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1225 elem.npeel = npeel;
1226 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1227 &elem);
1228 if (slot)
1229 slot->count++;
1230 else
1232 slot = XNEW (struct _vect_peel_info);
1233 slot->npeel = npeel;
1234 slot->dr = dr;
1235 slot->count = 1;
1236 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1237 INSERT);
1238 *new_slot = slot;
1241 if (!supportable_dr_alignment && !flag_vect_cost_model)
1242 slot->count += VECT_MAX_COST;
1246 /* Traverse peeling hash table to find peeling option that aligns maximum
1247 number of data accesses. */
1249 static int
1250 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1252 vect_peel_info elem = (vect_peel_info) *slot;
1253 vect_peel_extended_info max = (vect_peel_extended_info) data;
1255 if (elem->count > max->peel_info.count
1256 || (elem->count == max->peel_info.count
1257 && max->peel_info.npeel > elem->npeel))
1259 max->peel_info.npeel = elem->npeel;
1260 max->peel_info.count = elem->count;
1261 max->peel_info.dr = elem->dr;
1264 return 1;
1268 /* Traverse peeling hash table and calculate cost for each peeling option.
1269 Find the one with the lowest cost. */
1271 static int
1272 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1274 vect_peel_info elem = (vect_peel_info) *slot;
1275 vect_peel_extended_info min = (vect_peel_extended_info) data;
1276 int save_misalignment, dummy;
1277 unsigned int inside_cost = 0, outside_cost = 0, i;
1278 gimple stmt = DR_STMT (elem->dr);
1279 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1280 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1281 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1282 struct data_reference *dr;
1284 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1286 stmt = DR_STMT (dr);
1287 stmt_info = vinfo_for_stmt (stmt);
1288 /* For interleaving, only the alignment of the first access
1289 matters. */
1290 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1291 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1292 continue;
1294 save_misalignment = DR_MISALIGNMENT (dr);
1295 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1296 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1297 SET_DR_MISALIGNMENT (dr, save_misalignment);
1300 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1301 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1303 if (inside_cost < min->inside_cost
1304 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1306 min->inside_cost = inside_cost;
1307 min->outside_cost = outside_cost;
1308 min->peel_info.dr = elem->dr;
1309 min->peel_info.npeel = elem->npeel;
1312 return 1;
1316 /* Choose best peeling option by traversing peeling hash table and either
1317 choosing an option with the lowest cost (if cost model is enabled) or the
1318 option that aligns as many accesses as possible. */
1320 static struct data_reference *
1321 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1322 unsigned int *npeel)
1324 struct _vect_peel_extended_info res;
1326 res.peel_info.dr = NULL;
1328 if (flag_vect_cost_model)
1330 res.inside_cost = INT_MAX;
1331 res.outside_cost = INT_MAX;
1332 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1333 vect_peeling_hash_get_lowest_cost, &res);
1335 else
1337 res.peel_info.count = 0;
1338 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1339 vect_peeling_hash_get_most_frequent, &res);
1342 *npeel = res.peel_info.npeel;
1343 return res.peel_info.dr;
1347 /* Function vect_enhance_data_refs_alignment
1349 This pass will use loop versioning and loop peeling in order to enhance
1350 the alignment of data references in the loop.
1352 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1353 original loop is to be vectorized. Any other loops that are created by
1354 the transformations performed in this pass - are not supposed to be
1355 vectorized. This restriction will be relaxed.
1357 This pass will require a cost model to guide it whether to apply peeling
1358 or versioning or a combination of the two. For example, the scheme that
1359 intel uses when given a loop with several memory accesses, is as follows:
1360 choose one memory access ('p') which alignment you want to force by doing
1361 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1362 other accesses are not necessarily aligned, or (2) use loop versioning to
1363 generate one loop in which all accesses are aligned, and another loop in
1364 which only 'p' is necessarily aligned.
1366 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1367 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1368 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1370 Devising a cost model is the most critical aspect of this work. It will
1371 guide us on which access to peel for, whether to use loop versioning, how
1372 many versions to create, etc. The cost model will probably consist of
1373 generic considerations as well as target specific considerations (on
1374 powerpc for example, misaligned stores are more painful than misaligned
1375 loads).
1377 Here are the general steps involved in alignment enhancements:
1379 -- original loop, before alignment analysis:
1380 for (i=0; i<N; i++){
1381 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1382 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1385 -- After vect_compute_data_refs_alignment:
1386 for (i=0; i<N; i++){
1387 x = q[i]; # DR_MISALIGNMENT(q) = 3
1388 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1391 -- Possibility 1: we do loop versioning:
1392 if (p is aligned) {
1393 for (i=0; i<N; i++){ # loop 1A
1394 x = q[i]; # DR_MISALIGNMENT(q) = 3
1395 p[i] = y; # DR_MISALIGNMENT(p) = 0
1398 else {
1399 for (i=0; i<N; i++){ # loop 1B
1400 x = q[i]; # DR_MISALIGNMENT(q) = 3
1401 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1405 -- Possibility 2: we do loop peeling:
1406 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1407 x = q[i];
1408 p[i] = y;
1410 for (i = 3; i < N; i++){ # loop 2A
1411 x = q[i]; # DR_MISALIGNMENT(q) = 0
1412 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1415 -- Possibility 3: combination of loop peeling and versioning:
1416 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1417 x = q[i];
1418 p[i] = y;
1420 if (p is aligned) {
1421 for (i = 3; i<N; i++){ # loop 3A
1422 x = q[i]; # DR_MISALIGNMENT(q) = 0
1423 p[i] = y; # DR_MISALIGNMENT(p) = 0
1426 else {
1427 for (i = 3; i<N; i++){ # loop 3B
1428 x = q[i]; # DR_MISALIGNMENT(q) = 0
1429 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1433 These loops are later passed to loop_transform to be vectorized. The
1434 vectorizer will use the alignment information to guide the transformation
1435 (whether to generate regular loads/stores, or with special handling for
1436 misalignment). */
1438 bool
1439 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1441 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1442 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1443 enum dr_alignment_support supportable_dr_alignment;
1444 struct data_reference *dr0 = NULL, *first_store = NULL;
1445 struct data_reference *dr;
1446 unsigned int i, j;
1447 bool do_peeling = false;
1448 bool do_versioning = false;
1449 bool stat;
1450 gimple stmt;
1451 stmt_vec_info stmt_info;
1452 int vect_versioning_for_alias_required;
1453 unsigned int npeel = 0;
1454 bool all_misalignments_unknown = true;
1455 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1456 unsigned possible_npeel_number = 1;
1457 tree vectype;
1458 unsigned int nelements, mis, same_align_drs_max = 0;
1460 if (vect_print_dump_info (REPORT_DETAILS))
1461 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1463 /* While cost model enhancements are expected in the future, the high level
1464 view of the code at this time is as follows:
1466 A) If there is a misaligned access then see if peeling to align
1467 this access can make all data references satisfy
1468 vect_supportable_dr_alignment. If so, update data structures
1469 as needed and return true.
1471 B) If peeling wasn't possible and there is a data reference with an
1472 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1473 then see if loop versioning checks can be used to make all data
1474 references satisfy vect_supportable_dr_alignment. If so, update
1475 data structures as needed and return true.
1477 C) If neither peeling nor versioning were successful then return false if
1478 any data reference does not satisfy vect_supportable_dr_alignment.
1480 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1482 Note, Possibility 3 above (which is peeling and versioning together) is not
1483 being done at this time. */
1485 /* (1) Peeling to force alignment. */
1487 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1488 Considerations:
1489 + How many accesses will become aligned due to the peeling
1490 - How many accesses will become unaligned due to the peeling,
1491 and the cost of misaligned accesses.
1492 - The cost of peeling (the extra runtime checks, the increase
1493 in code size). */
1495 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1497 stmt = DR_STMT (dr);
1498 stmt_info = vinfo_for_stmt (stmt);
1500 if (!STMT_VINFO_RELEVANT (stmt_info))
1501 continue;
1503 /* For interleaving, only the alignment of the first access
1504 matters. */
1505 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1506 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1507 continue;
1509 /* For invariant accesses there is nothing to enhance. */
1510 if (integer_zerop (DR_STEP (dr)))
1511 continue;
1513 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1514 do_peeling = vector_alignment_reachable_p (dr);
1515 if (do_peeling)
1517 if (known_alignment_for_access_p (dr))
1519 unsigned int npeel_tmp;
1520 bool negative = tree_int_cst_compare (DR_STEP (dr),
1521 size_zero_node) < 0;
1523 /* Save info about DR in the hash table. */
1524 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1525 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1526 htab_create (1, vect_peeling_hash,
1527 vect_peeling_hash_eq, free);
1529 vectype = STMT_VINFO_VECTYPE (stmt_info);
1530 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1531 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1532 TREE_TYPE (DR_REF (dr))));
1533 npeel_tmp = (negative
1534 ? (mis - nelements) : (nelements - mis))
1535 & (nelements - 1);
1537 /* For multiple types, it is possible that the bigger type access
1538 will have more than one peeling option. E.g., a loop with two
1539 types: one of size (vector size / 4), and the other one of
1540 size (vector size / 8). Vectorization factor will 8. If both
1541 access are misaligned by 3, the first one needs one scalar
1542 iteration to be aligned, and the second one needs 5. But the
1543 the first one will be aligned also by peeling 5 scalar
1544 iterations, and in that case both accesses will be aligned.
1545 Hence, except for the immediate peeling amount, we also want
1546 to try to add full vector size, while we don't exceed
1547 vectorization factor.
1548 We do this automtically for cost model, since we calculate cost
1549 for every peeling option. */
1550 if (!flag_vect_cost_model)
1551 possible_npeel_number = vf /nelements;
1553 /* Handle the aligned case. We may decide to align some other
1554 access, making DR unaligned. */
1555 if (DR_MISALIGNMENT (dr) == 0)
1557 npeel_tmp = 0;
1558 if (!flag_vect_cost_model)
1559 possible_npeel_number++;
1562 for (j = 0; j < possible_npeel_number; j++)
1564 gcc_assert (npeel_tmp <= vf);
1565 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1566 npeel_tmp += nelements;
1569 all_misalignments_unknown = false;
1570 /* Data-ref that was chosen for the case that all the
1571 misalignments are unknown is not relevant anymore, since we
1572 have a data-ref with known alignment. */
1573 dr0 = NULL;
1575 else
1577 /* If we don't know all the misalignment values, we prefer
1578 peeling for data-ref that has maximum number of data-refs
1579 with the same alignment, unless the target prefers to align
1580 stores over load. */
1581 if (all_misalignments_unknown)
1583 if (same_align_drs_max < VEC_length (dr_p,
1584 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1585 || !dr0)
1587 same_align_drs_max = VEC_length (dr_p,
1588 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1589 dr0 = dr;
1592 if (!first_store && DR_IS_WRITE (dr))
1593 first_store = dr;
1596 /* If there are both known and unknown misaligned accesses in the
1597 loop, we choose peeling amount according to the known
1598 accesses. */
1601 if (!supportable_dr_alignment)
1603 dr0 = dr;
1604 if (!first_store && DR_IS_WRITE (dr))
1605 first_store = dr;
1609 else
1611 if (!aligned_access_p (dr))
1613 if (vect_print_dump_info (REPORT_DETAILS))
1614 fprintf (vect_dump, "vector alignment may not be reachable");
1616 break;
1621 vect_versioning_for_alias_required
1622 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1624 /* Temporarily, if versioning for alias is required, we disable peeling
1625 until we support peeling and versioning. Often peeling for alignment
1626 will require peeling for loop-bound, which in turn requires that we
1627 know how to adjust the loop ivs after the loop. */
1628 if (vect_versioning_for_alias_required
1629 || !vect_can_advance_ivs_p (loop_vinfo)
1630 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1631 do_peeling = false;
1633 if (do_peeling && all_misalignments_unknown
1634 && vect_supportable_dr_alignment (dr0, false))
1637 /* Check if the target requires to prefer stores over loads, i.e., if
1638 misaligned stores are more expensive than misaligned loads (taking
1639 drs with same alignment into account). */
1640 if (first_store && DR_IS_READ (dr0))
1642 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1643 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1644 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1645 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1647 vect_get_data_access_cost (dr0, &load_inside_cost,
1648 &load_outside_cost);
1649 vect_get_data_access_cost (first_store, &store_inside_cost,
1650 &store_outside_cost);
1652 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1653 aligning the load DR0). */
1654 load_inside_penalty = store_inside_cost;
1655 load_outside_penalty = store_outside_cost;
1656 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1657 (vinfo_for_stmt (DR_STMT (first_store))),
1658 i, dr);
1659 i++)
1660 if (DR_IS_READ (dr))
1662 load_inside_penalty += load_inside_cost;
1663 load_outside_penalty += load_outside_cost;
1665 else
1667 load_inside_penalty += store_inside_cost;
1668 load_outside_penalty += store_outside_cost;
1671 /* Calculate the penalty for leaving DR0 unaligned (by
1672 aligning the FIRST_STORE). */
1673 store_inside_penalty = load_inside_cost;
1674 store_outside_penalty = load_outside_cost;
1675 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1676 (vinfo_for_stmt (DR_STMT (dr0))),
1677 i, dr);
1678 i++)
1679 if (DR_IS_READ (dr))
1681 store_inside_penalty += load_inside_cost;
1682 store_outside_penalty += load_outside_cost;
1684 else
1686 store_inside_penalty += store_inside_cost;
1687 store_outside_penalty += store_outside_cost;
1690 if (load_inside_penalty > store_inside_penalty
1691 || (load_inside_penalty == store_inside_penalty
1692 && load_outside_penalty > store_outside_penalty))
1693 dr0 = first_store;
1696 /* In case there are only loads with different unknown misalignments, use
1697 peeling only if it may help to align other accesses in the loop. */
1698 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1699 (vinfo_for_stmt (DR_STMT (dr0))))
1700 && vect_supportable_dr_alignment (dr0, false)
1701 != dr_unaligned_supported)
1702 do_peeling = false;
1705 if (do_peeling && !dr0)
1707 /* Peeling is possible, but there is no data access that is not supported
1708 unless aligned. So we try to choose the best possible peeling. */
1710 /* We should get here only if there are drs with known misalignment. */
1711 gcc_assert (!all_misalignments_unknown);
1713 /* Choose the best peeling from the hash table. */
1714 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1715 if (!dr0 || !npeel)
1716 do_peeling = false;
1719 if (do_peeling)
1721 stmt = DR_STMT (dr0);
1722 stmt_info = vinfo_for_stmt (stmt);
1723 vectype = STMT_VINFO_VECTYPE (stmt_info);
1724 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1726 if (known_alignment_for_access_p (dr0))
1728 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1729 size_zero_node) < 0;
1730 if (!npeel)
1732 /* Since it's known at compile time, compute the number of
1733 iterations in the peeled loop (the peeling factor) for use in
1734 updating DR_MISALIGNMENT values. The peeling factor is the
1735 vectorization factor minus the misalignment as an element
1736 count. */
1737 mis = DR_MISALIGNMENT (dr0);
1738 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1739 npeel = ((negative ? mis - nelements : nelements - mis)
1740 & (nelements - 1));
1743 /* For interleaved data access every iteration accesses all the
1744 members of the group, therefore we divide the number of iterations
1745 by the group size. */
1746 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1747 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1748 npeel /= GROUP_SIZE (stmt_info);
1750 if (vect_print_dump_info (REPORT_DETAILS))
1751 fprintf (vect_dump, "Try peeling by %d", npeel);
1754 /* Ensure that all data refs can be vectorized after the peel. */
1755 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1757 int save_misalignment;
1759 if (dr == dr0)
1760 continue;
1762 stmt = DR_STMT (dr);
1763 stmt_info = vinfo_for_stmt (stmt);
1764 /* For interleaving, only the alignment of the first access
1765 matters. */
1766 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1767 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1768 continue;
1770 save_misalignment = DR_MISALIGNMENT (dr);
1771 vect_update_misalignment_for_peel (dr, dr0, npeel);
1772 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1773 SET_DR_MISALIGNMENT (dr, save_misalignment);
1775 if (!supportable_dr_alignment)
1777 do_peeling = false;
1778 break;
1782 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1784 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1785 if (!stat)
1786 do_peeling = false;
1787 else
1788 return stat;
1791 if (do_peeling)
1793 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1794 If the misalignment of DR_i is identical to that of dr0 then set
1795 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1796 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1797 by the peeling factor times the element size of DR_i (MOD the
1798 vectorization factor times the size). Otherwise, the
1799 misalignment of DR_i must be set to unknown. */
1800 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1801 if (dr != dr0)
1802 vect_update_misalignment_for_peel (dr, dr0, npeel);
1804 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1805 if (npeel)
1806 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1807 else
1808 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1809 SET_DR_MISALIGNMENT (dr0, 0);
1810 if (vect_print_dump_info (REPORT_ALIGNMENT))
1811 fprintf (vect_dump, "Alignment of access forced using peeling.");
1813 if (vect_print_dump_info (REPORT_DETAILS))
1814 fprintf (vect_dump, "Peeling for alignment will be applied.");
1816 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1817 gcc_assert (stat);
1818 return stat;
1823 /* (2) Versioning to force alignment. */
1825 /* Try versioning if:
1826 1) flag_tree_vect_loop_version is TRUE
1827 2) optimize loop for speed
1828 3) there is at least one unsupported misaligned data ref with an unknown
1829 misalignment, and
1830 4) all misaligned data refs with a known misalignment are supported, and
1831 5) the number of runtime alignment checks is within reason. */
1833 do_versioning =
1834 flag_tree_vect_loop_version
1835 && optimize_loop_nest_for_speed_p (loop)
1836 && (!loop->inner); /* FORNOW */
1838 if (do_versioning)
1840 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1842 stmt = DR_STMT (dr);
1843 stmt_info = vinfo_for_stmt (stmt);
1845 /* For interleaving, only the alignment of the first access
1846 matters. */
1847 if (aligned_access_p (dr)
1848 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1849 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1850 continue;
1852 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1854 if (!supportable_dr_alignment)
1856 gimple stmt;
1857 int mask;
1858 tree vectype;
1860 if (known_alignment_for_access_p (dr)
1861 || VEC_length (gimple,
1862 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1863 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1865 do_versioning = false;
1866 break;
1869 stmt = DR_STMT (dr);
1870 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1871 gcc_assert (vectype);
1873 /* The rightmost bits of an aligned address must be zeros.
1874 Construct the mask needed for this test. For example,
1875 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1876 mask must be 15 = 0xf. */
1877 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1879 /* FORNOW: use the same mask to test all potentially unaligned
1880 references in the loop. The vectorizer currently supports
1881 a single vector size, see the reference to
1882 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1883 vectorization factor is computed. */
1884 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1885 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1886 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1887 VEC_safe_push (gimple, heap,
1888 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1889 DR_STMT (dr));
1893 /* Versioning requires at least one misaligned data reference. */
1894 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1895 do_versioning = false;
1896 else if (!do_versioning)
1897 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1900 if (do_versioning)
1902 VEC(gimple,heap) *may_misalign_stmts
1903 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1904 gimple stmt;
1906 /* It can now be assumed that the data references in the statements
1907 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1908 of the loop being vectorized. */
1909 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1911 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1912 dr = STMT_VINFO_DATA_REF (stmt_info);
1913 SET_DR_MISALIGNMENT (dr, 0);
1914 if (vect_print_dump_info (REPORT_ALIGNMENT))
1915 fprintf (vect_dump, "Alignment of access forced using versioning.");
1918 if (vect_print_dump_info (REPORT_DETAILS))
1919 fprintf (vect_dump, "Versioning for alignment will be applied.");
1921 /* Peeling and versioning can't be done together at this time. */
1922 gcc_assert (! (do_peeling && do_versioning));
1924 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1925 gcc_assert (stat);
1926 return stat;
1929 /* This point is reached if neither peeling nor versioning is being done. */
1930 gcc_assert (! (do_peeling || do_versioning));
1932 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1933 return stat;
1937 /* Function vect_find_same_alignment_drs.
1939 Update group and alignment relations according to the chosen
1940 vectorization factor. */
1942 static void
1943 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1944 loop_vec_info loop_vinfo)
1946 unsigned int i;
1947 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1948 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1949 struct data_reference *dra = DDR_A (ddr);
1950 struct data_reference *drb = DDR_B (ddr);
1951 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1952 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1953 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1954 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1955 lambda_vector dist_v;
1956 unsigned int loop_depth;
1958 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1959 return;
1961 if (dra == drb)
1962 return;
1964 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1965 return;
1967 /* Loop-based vectorization and known data dependence. */
1968 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1969 return;
1971 /* Data-dependence analysis reports a distance vector of zero
1972 for data-references that overlap only in the first iteration
1973 but have different sign step (see PR45764).
1974 So as a sanity check require equal DR_STEP. */
1975 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1976 return;
1978 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1979 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1981 int dist = dist_v[loop_depth];
1983 if (vect_print_dump_info (REPORT_DR_DETAILS))
1984 fprintf (vect_dump, "dependence distance = %d.", dist);
1986 /* Same loop iteration. */
1987 if (dist == 0
1988 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1990 /* Two references with distance zero have the same alignment. */
1991 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1992 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1993 if (vect_print_dump_info (REPORT_ALIGNMENT))
1994 fprintf (vect_dump, "accesses have the same alignment.");
1995 if (vect_print_dump_info (REPORT_DR_DETAILS))
1997 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1998 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1999 fprintf (vect_dump, " and ");
2000 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
2007 /* Function vect_analyze_data_refs_alignment
2009 Analyze the alignment of the data-references in the loop.
2010 Return FALSE if a data reference is found that cannot be vectorized. */
2012 bool
2013 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2014 bb_vec_info bb_vinfo)
2016 if (vect_print_dump_info (REPORT_DETAILS))
2017 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2019 /* Mark groups of data references with same alignment using
2020 data dependence information. */
2021 if (loop_vinfo)
2023 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2024 struct data_dependence_relation *ddr;
2025 unsigned int i;
2027 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2028 vect_find_same_alignment_drs (ddr, loop_vinfo);
2031 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2033 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2034 fprintf (vect_dump,
2035 "not vectorized: can't calculate alignment for data ref.");
2036 return false;
2039 return true;
2043 /* Analyze groups of strided accesses: check that DR belongs to a group of
2044 strided accesses of legal size, step, etc. Detect gaps, single element
2045 interleaving, and other special cases. Set strided access info.
2046 Collect groups of strided stores for further use in SLP analysis. */
2048 static bool
2049 vect_analyze_group_access (struct data_reference *dr)
2051 tree step = DR_STEP (dr);
2052 tree scalar_type = TREE_TYPE (DR_REF (dr));
2053 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2054 gimple stmt = DR_STMT (dr);
2055 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2056 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2057 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2058 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2059 HOST_WIDE_INT stride, last_accessed_element = 1;
2060 bool slp_impossible = false;
2061 struct loop *loop = NULL;
2063 if (loop_vinfo)
2064 loop = LOOP_VINFO_LOOP (loop_vinfo);
2066 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2067 interleaving group (including gaps). */
2068 stride = dr_step / type_size;
2070 /* Not consecutive access is possible only if it is a part of interleaving. */
2071 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2073 /* Check if it this DR is a part of interleaving, and is a single
2074 element of the group that is accessed in the loop. */
2076 /* Gaps are supported only for loads. STEP must be a multiple of the type
2077 size. The size of the group must be a power of 2. */
2078 if (DR_IS_READ (dr)
2079 && (dr_step % type_size) == 0
2080 && stride > 0
2081 && exact_log2 (stride) != -1)
2083 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2084 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2085 if (vect_print_dump_info (REPORT_DR_DETAILS))
2087 fprintf (vect_dump, "Detected single element interleaving ");
2088 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2089 fprintf (vect_dump, " step ");
2090 print_generic_expr (vect_dump, step, TDF_SLIM);
2093 if (loop_vinfo)
2095 if (vect_print_dump_info (REPORT_DETAILS))
2096 fprintf (vect_dump, "Data access with gaps requires scalar "
2097 "epilogue loop");
2098 if (loop->inner)
2100 if (vect_print_dump_info (REPORT_DETAILS))
2101 fprintf (vect_dump, "Peeling for outer loop is not"
2102 " supported");
2103 return false;
2106 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2109 return true;
2112 if (vect_print_dump_info (REPORT_DETAILS))
2114 fprintf (vect_dump, "not consecutive access ");
2115 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2118 if (bb_vinfo)
2120 /* Mark the statement as unvectorizable. */
2121 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2122 return true;
2125 return false;
2128 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2130 /* First stmt in the interleaving chain. Check the chain. */
2131 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2132 struct data_reference *data_ref = dr;
2133 unsigned int count = 1;
2134 tree next_step;
2135 tree prev_init = DR_INIT (data_ref);
2136 gimple prev = stmt;
2137 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2139 while (next)
2141 /* Skip same data-refs. In case that two or more stmts share
2142 data-ref (supported only for loads), we vectorize only the first
2143 stmt, and the rest get their vectorized loads from the first
2144 one. */
2145 if (!tree_int_cst_compare (DR_INIT (data_ref),
2146 DR_INIT (STMT_VINFO_DATA_REF (
2147 vinfo_for_stmt (next)))))
2149 if (DR_IS_WRITE (data_ref))
2151 if (vect_print_dump_info (REPORT_DETAILS))
2152 fprintf (vect_dump, "Two store stmts share the same dr.");
2153 return false;
2156 /* Check that there is no load-store dependencies for this loads
2157 to prevent a case of load-store-load to the same location. */
2158 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2159 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2161 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump,
2163 "READ_WRITE dependence in interleaving.");
2164 return false;
2167 /* For load use the same data-ref load. */
2168 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2170 prev = next;
2171 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2172 continue;
2175 prev = next;
2177 /* Check that all the accesses have the same STEP. */
2178 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2179 if (tree_int_cst_compare (step, next_step))
2181 if (vect_print_dump_info (REPORT_DETAILS))
2182 fprintf (vect_dump, "not consecutive access in interleaving");
2183 return false;
2186 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2187 /* Check that the distance between two accesses is equal to the type
2188 size. Otherwise, we have gaps. */
2189 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2190 - TREE_INT_CST_LOW (prev_init)) / type_size;
2191 if (diff != 1)
2193 /* FORNOW: SLP of accesses with gaps is not supported. */
2194 slp_impossible = true;
2195 if (DR_IS_WRITE (data_ref))
2197 if (vect_print_dump_info (REPORT_DETAILS))
2198 fprintf (vect_dump, "interleaved store with gaps");
2199 return false;
2202 gaps += diff - 1;
2205 last_accessed_element += diff;
2207 /* Store the gap from the previous member of the group. If there is no
2208 gap in the access, GROUP_GAP is always 1. */
2209 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2211 prev_init = DR_INIT (data_ref);
2212 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2213 /* Count the number of data-refs in the chain. */
2214 count++;
2217 /* COUNT is the number of accesses found, we multiply it by the size of
2218 the type to get COUNT_IN_BYTES. */
2219 count_in_bytes = type_size * count;
2221 /* Check that the size of the interleaving (including gaps) is not
2222 greater than STEP. */
2223 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2225 if (vect_print_dump_info (REPORT_DETAILS))
2227 fprintf (vect_dump, "interleaving size is greater than step for ");
2228 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2230 return false;
2233 /* Check that the size of the interleaving is equal to STEP for stores,
2234 i.e., that there are no gaps. */
2235 if (dr_step && dr_step != count_in_bytes)
2237 if (DR_IS_READ (dr))
2239 slp_impossible = true;
2240 /* There is a gap after the last load in the group. This gap is a
2241 difference between the stride and the number of elements. When
2242 there is no gap, this difference should be 0. */
2243 GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2245 else
2247 if (vect_print_dump_info (REPORT_DETAILS))
2248 fprintf (vect_dump, "interleaved store with gaps");
2249 return false;
2253 /* Check that STEP is a multiple of type size. */
2254 if (dr_step && (dr_step % type_size) != 0)
2256 if (vect_print_dump_info (REPORT_DETAILS))
2258 fprintf (vect_dump, "step is not a multiple of type size: step ");
2259 print_generic_expr (vect_dump, step, TDF_SLIM);
2260 fprintf (vect_dump, " size ");
2261 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2262 TDF_SLIM);
2264 return false;
2267 if (stride == 0)
2268 stride = count;
2270 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2271 if (vect_print_dump_info (REPORT_DETAILS))
2272 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2274 /* SLP: create an SLP data structure for every interleaving group of
2275 stores for further analysis in vect_analyse_slp. */
2276 if (DR_IS_WRITE (dr) && !slp_impossible)
2278 if (loop_vinfo)
2279 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2280 stmt);
2281 if (bb_vinfo)
2282 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2283 stmt);
2286 /* There is a gap in the end of the group. */
2287 if (stride - last_accessed_element > 0 && loop_vinfo)
2289 if (vect_print_dump_info (REPORT_DETAILS))
2290 fprintf (vect_dump, "Data access with gaps requires scalar "
2291 "epilogue loop");
2292 if (loop->inner)
2294 if (vect_print_dump_info (REPORT_DETAILS))
2295 fprintf (vect_dump, "Peeling for outer loop is not supported");
2296 return false;
2299 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2303 return true;
2307 /* Analyze the access pattern of the data-reference DR.
2308 In case of non-consecutive accesses call vect_analyze_group_access() to
2309 analyze groups of strided accesses. */
2311 static bool
2312 vect_analyze_data_ref_access (struct data_reference *dr)
2314 tree step = DR_STEP (dr);
2315 tree scalar_type = TREE_TYPE (DR_REF (dr));
2316 gimple stmt = DR_STMT (dr);
2317 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2318 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2319 struct loop *loop = NULL;
2320 HOST_WIDE_INT dr_step;
2322 if (loop_vinfo)
2323 loop = LOOP_VINFO_LOOP (loop_vinfo);
2325 if (loop_vinfo && !step)
2327 if (vect_print_dump_info (REPORT_DETAILS))
2328 fprintf (vect_dump, "bad data-ref access in loop");
2329 return false;
2332 /* Allow invariant loads in loops. */
2333 dr_step = TREE_INT_CST_LOW (step);
2334 if (loop_vinfo && dr_step == 0)
2336 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2337 return DR_IS_READ (dr);
2340 if (loop && nested_in_vect_loop_p (loop, stmt))
2342 /* Interleaved accesses are not yet supported within outer-loop
2343 vectorization for references in the inner-loop. */
2344 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2346 /* For the rest of the analysis we use the outer-loop step. */
2347 step = STMT_VINFO_DR_STEP (stmt_info);
2348 dr_step = TREE_INT_CST_LOW (step);
2350 if (dr_step == 0)
2352 if (vect_print_dump_info (REPORT_ALIGNMENT))
2353 fprintf (vect_dump, "zero step in outer loop.");
2354 if (DR_IS_READ (dr))
2355 return true;
2356 else
2357 return false;
2361 /* Consecutive? */
2362 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2363 || (dr_step < 0
2364 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2366 /* Mark that it is not interleaving. */
2367 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2368 return true;
2371 if (loop && nested_in_vect_loop_p (loop, stmt))
2373 if (vect_print_dump_info (REPORT_ALIGNMENT))
2374 fprintf (vect_dump, "strided access in outer loop.");
2375 return false;
2378 /* Not consecutive access - check if it's a part of interleaving group. */
2379 return vect_analyze_group_access (dr);
2383 /* Function vect_analyze_data_ref_accesses.
2385 Analyze the access pattern of all the data references in the loop.
2387 FORNOW: the only access pattern that is considered vectorizable is a
2388 simple step 1 (consecutive) access.
2390 FORNOW: handle only arrays and pointer accesses. */
2392 bool
2393 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2395 unsigned int i;
2396 VEC (data_reference_p, heap) *datarefs;
2397 struct data_reference *dr;
2399 if (vect_print_dump_info (REPORT_DETAILS))
2400 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2402 if (loop_vinfo)
2403 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2404 else
2405 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2407 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2408 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2409 && !vect_analyze_data_ref_access (dr))
2411 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2412 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2414 if (bb_vinfo)
2416 /* Mark the statement as not vectorizable. */
2417 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2418 continue;
2420 else
2421 return false;
2424 return true;
2427 /* Function vect_prune_runtime_alias_test_list.
2429 Prune a list of ddrs to be tested at run-time by versioning for alias.
2430 Return FALSE if resulting list of ddrs is longer then allowed by
2431 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2433 bool
2434 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2436 VEC (ddr_p, heap) * ddrs =
2437 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2438 unsigned i, j;
2440 if (vect_print_dump_info (REPORT_DETAILS))
2441 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2443 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2445 bool found;
2446 ddr_p ddr_i;
2448 ddr_i = VEC_index (ddr_p, ddrs, i);
2449 found = false;
2451 for (j = 0; j < i; j++)
2453 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2455 if (vect_vfa_range_equal (ddr_i, ddr_j))
2457 if (vect_print_dump_info (REPORT_DR_DETAILS))
2459 fprintf (vect_dump, "found equal ranges ");
2460 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2461 fprintf (vect_dump, ", ");
2462 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2463 fprintf (vect_dump, " and ");
2464 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2465 fprintf (vect_dump, ", ");
2466 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2468 found = true;
2469 break;
2473 if (found)
2475 VEC_ordered_remove (ddr_p, ddrs, i);
2476 continue;
2478 i++;
2481 if (VEC_length (ddr_p, ddrs) >
2482 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2484 if (vect_print_dump_info (REPORT_DR_DETAILS))
2486 fprintf (vect_dump,
2487 "disable versioning for alias - max number of generated "
2488 "checks exceeded.");
2491 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2493 return false;
2496 return true;
2499 /* Check whether a non-affine read in stmt is suitable for gather load
2500 and if so, return a builtin decl for that operation. */
2502 tree
2503 vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2504 tree *offp, int *scalep)
2506 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2507 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2508 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2509 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2510 tree offtype = NULL_TREE;
2511 tree decl, base, off;
2512 enum machine_mode pmode;
2513 int punsignedp, pvolatilep;
2515 /* The gather builtins need address of the form
2516 loop_invariant + vector * {1, 2, 4, 8}
2518 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2519 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2520 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2521 multiplications and additions in it. To get a vector, we need
2522 a single SSA_NAME that will be defined in the loop and will
2523 contain everything that is not loop invariant and that can be
2524 vectorized. The following code attempts to find such a preexistng
2525 SSA_NAME OFF and put the loop invariants into a tree BASE
2526 that can be gimplified before the loop. */
2527 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2528 &pmode, &punsignedp, &pvolatilep, false);
2529 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2531 if (TREE_CODE (base) == MEM_REF)
2533 if (!integer_zerop (TREE_OPERAND (base, 1)))
2535 if (off == NULL_TREE)
2537 double_int moff = mem_ref_offset (base);
2538 off = double_int_to_tree (sizetype, moff);
2540 else
2541 off = size_binop (PLUS_EXPR, off,
2542 fold_convert (sizetype, TREE_OPERAND (base, 1)));
2544 base = TREE_OPERAND (base, 0);
2546 else
2547 base = build_fold_addr_expr (base);
2549 if (off == NULL_TREE)
2550 off = size_zero_node;
2552 /* If base is not loop invariant, either off is 0, then we start with just
2553 the constant offset in the loop invariant BASE and continue with base
2554 as OFF, otherwise give up.
2555 We could handle that case by gimplifying the addition of base + off
2556 into some SSA_NAME and use that as off, but for now punt. */
2557 if (!expr_invariant_in_loop_p (loop, base))
2559 if (!integer_zerop (off))
2560 return NULL_TREE;
2561 off = base;
2562 base = size_int (pbitpos / BITS_PER_UNIT);
2564 /* Otherwise put base + constant offset into the loop invariant BASE
2565 and continue with OFF. */
2566 else
2568 base = fold_convert (sizetype, base);
2569 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2572 /* OFF at this point may be either a SSA_NAME or some tree expression
2573 from get_inner_reference. Try to peel off loop invariants from it
2574 into BASE as long as possible. */
2575 STRIP_NOPS (off);
2576 while (offtype == NULL_TREE)
2578 enum tree_code code;
2579 tree op0, op1, add = NULL_TREE;
2581 if (TREE_CODE (off) == SSA_NAME)
2583 gimple def_stmt = SSA_NAME_DEF_STMT (off);
2585 if (expr_invariant_in_loop_p (loop, off))
2586 return NULL_TREE;
2588 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2589 break;
2591 op0 = gimple_assign_rhs1 (def_stmt);
2592 code = gimple_assign_rhs_code (def_stmt);
2593 op1 = gimple_assign_rhs2 (def_stmt);
2595 else
2597 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2598 return NULL_TREE;
2599 code = TREE_CODE (off);
2600 extract_ops_from_tree (off, &code, &op0, &op1);
2602 switch (code)
2604 case POINTER_PLUS_EXPR:
2605 case PLUS_EXPR:
2606 if (expr_invariant_in_loop_p (loop, op0))
2608 add = op0;
2609 off = op1;
2610 do_add:
2611 add = fold_convert (sizetype, add);
2612 if (scale != 1)
2613 add = size_binop (MULT_EXPR, add, size_int (scale));
2614 base = size_binop (PLUS_EXPR, base, add);
2615 continue;
2617 if (expr_invariant_in_loop_p (loop, op1))
2619 add = op1;
2620 off = op0;
2621 goto do_add;
2623 break;
2624 case MINUS_EXPR:
2625 if (expr_invariant_in_loop_p (loop, op1))
2627 add = fold_convert (sizetype, op1);
2628 add = size_binop (MINUS_EXPR, size_zero_node, add);
2629 off = op0;
2630 goto do_add;
2632 break;
2633 case MULT_EXPR:
2634 if (scale == 1 && host_integerp (op1, 0))
2636 scale = tree_low_cst (op1, 0);
2637 off = op0;
2638 continue;
2640 break;
2641 case SSA_NAME:
2642 off = op0;
2643 continue;
2644 CASE_CONVERT:
2645 if (!POINTER_TYPE_P (TREE_TYPE (op0))
2646 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2647 break;
2648 if (TYPE_PRECISION (TREE_TYPE (op0))
2649 == TYPE_PRECISION (TREE_TYPE (off)))
2651 off = op0;
2652 continue;
2654 if (TYPE_PRECISION (TREE_TYPE (op0))
2655 < TYPE_PRECISION (TREE_TYPE (off)))
2657 off = op0;
2658 offtype = TREE_TYPE (off);
2659 STRIP_NOPS (off);
2660 continue;
2662 break;
2663 default:
2664 break;
2666 break;
2669 /* If at the end OFF still isn't a SSA_NAME or isn't
2670 defined in the loop, punt. */
2671 if (TREE_CODE (off) != SSA_NAME
2672 || expr_invariant_in_loop_p (loop, off))
2673 return NULL_TREE;
2675 if (offtype == NULL_TREE)
2676 offtype = TREE_TYPE (off);
2678 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2679 offtype, scale);
2680 if (decl == NULL_TREE)
2681 return NULL_TREE;
2683 if (basep)
2684 *basep = base;
2685 if (offp)
2686 *offp = off;
2687 if (scalep)
2688 *scalep = scale;
2689 return decl;
2693 /* Function vect_analyze_data_refs.
2695 Find all the data references in the loop or basic block.
2697 The general structure of the analysis of data refs in the vectorizer is as
2698 follows:
2699 1- vect_analyze_data_refs(loop/bb): call
2700 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2701 in the loop/bb and their dependences.
2702 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2703 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2704 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2708 bool
2709 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2710 bb_vec_info bb_vinfo,
2711 int *min_vf)
2713 struct loop *loop = NULL;
2714 basic_block bb = NULL;
2715 unsigned int i;
2716 VEC (data_reference_p, heap) *datarefs;
2717 struct data_reference *dr;
2718 tree scalar_type;
2719 bool res, stop_bb_analysis = false;
2721 if (vect_print_dump_info (REPORT_DETAILS))
2722 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2724 if (loop_vinfo)
2726 loop = LOOP_VINFO_LOOP (loop_vinfo);
2727 res = compute_data_dependences_for_loop
2728 (loop, true,
2729 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2730 &LOOP_VINFO_DATAREFS (loop_vinfo),
2731 &LOOP_VINFO_DDRS (loop_vinfo));
2733 if (!res)
2735 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2736 fprintf (vect_dump, "not vectorized: loop contains function calls"
2737 " or data references that cannot be analyzed");
2738 return false;
2741 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2743 else
2745 bb = BB_VINFO_BB (bb_vinfo);
2746 res = compute_data_dependences_for_bb (bb, true,
2747 &BB_VINFO_DATAREFS (bb_vinfo),
2748 &BB_VINFO_DDRS (bb_vinfo));
2749 if (!res)
2751 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2752 fprintf (vect_dump, "not vectorized: basic block contains function"
2753 " calls or data references that cannot be analyzed");
2754 return false;
2757 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2760 /* Go through the data-refs, check that the analysis succeeded. Update
2761 pointer from stmt_vec_info struct to DR and vectype. */
2763 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2765 gimple stmt;
2766 stmt_vec_info stmt_info;
2767 tree base, offset, init;
2768 bool gather = false;
2769 int vf;
2771 if (!dr || !DR_REF (dr))
2773 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2774 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2776 return false;
2779 stmt = DR_STMT (dr);
2780 stmt_info = vinfo_for_stmt (stmt);
2782 if (stop_bb_analysis)
2784 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2785 continue;
2788 /* Check that analysis of the data-ref succeeded. */
2789 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2790 || !DR_STEP (dr))
2792 /* If target supports vector gather loads, see if they can't
2793 be used. */
2794 if (loop_vinfo
2795 && DR_IS_READ (dr)
2796 && !TREE_THIS_VOLATILE (DR_REF (dr))
2797 && targetm.vectorize.builtin_gather != NULL
2798 && !nested_in_vect_loop_p (loop, stmt))
2800 struct data_reference *newdr
2801 = create_data_ref (NULL, loop_containing_stmt (stmt),
2802 DR_REF (dr), stmt, true);
2803 gcc_assert (newdr != NULL && DR_REF (newdr));
2804 if (DR_BASE_ADDRESS (newdr)
2805 && DR_OFFSET (newdr)
2806 && DR_INIT (newdr)
2807 && DR_STEP (newdr)
2808 && integer_zerop (DR_STEP (newdr)))
2810 dr = newdr;
2811 gather = true;
2813 else
2814 free_data_ref (newdr);
2817 if (!gather)
2819 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2821 fprintf (vect_dump, "not vectorized: data ref analysis "
2822 "failed ");
2823 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2826 if (bb_vinfo)
2828 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2829 stop_bb_analysis = true;
2830 continue;
2833 return false;
2837 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2839 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2840 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2841 "constant");
2843 if (bb_vinfo)
2845 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2846 stop_bb_analysis = true;
2847 continue;
2850 if (gather)
2851 free_data_ref (dr);
2852 return false;
2855 if (TREE_THIS_VOLATILE (DR_REF (dr)))
2857 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2859 fprintf (vect_dump, "not vectorized: volatile type ");
2860 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2863 if (bb_vinfo)
2865 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2866 stop_bb_analysis = true;
2867 continue;
2870 return false;
2873 base = unshare_expr (DR_BASE_ADDRESS (dr));
2874 offset = unshare_expr (DR_OFFSET (dr));
2875 init = unshare_expr (DR_INIT (dr));
2877 if (stmt_can_throw_internal (stmt))
2879 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2881 fprintf (vect_dump, "not vectorized: statement can throw an "
2882 "exception ");
2883 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2886 if (bb_vinfo)
2888 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2889 stop_bb_analysis = true;
2890 continue;
2893 if (gather)
2894 free_data_ref (dr);
2895 return false;
2898 if (is_gimple_call (stmt))
2900 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2902 fprintf (vect_dump, "not vectorized: dr in a call ");
2903 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2906 if (bb_vinfo)
2908 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2909 stop_bb_analysis = true;
2910 continue;
2913 if (gather)
2914 free_data_ref (dr);
2915 return false;
2918 /* Update DR field in stmt_vec_info struct. */
2920 /* If the dataref is in an inner-loop of the loop that is considered for
2921 for vectorization, we also want to analyze the access relative to
2922 the outer-loop (DR contains information only relative to the
2923 inner-most enclosing loop). We do that by building a reference to the
2924 first location accessed by the inner-loop, and analyze it relative to
2925 the outer-loop. */
2926 if (loop && nested_in_vect_loop_p (loop, stmt))
2928 tree outer_step, outer_base, outer_init;
2929 HOST_WIDE_INT pbitsize, pbitpos;
2930 tree poffset;
2931 enum machine_mode pmode;
2932 int punsignedp, pvolatilep;
2933 affine_iv base_iv, offset_iv;
2934 tree dinit;
2936 /* Build a reference to the first location accessed by the
2937 inner-loop: *(BASE+INIT). (The first location is actually
2938 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2939 tree inner_base = build_fold_indirect_ref
2940 (fold_build_pointer_plus (base, init));
2942 if (vect_print_dump_info (REPORT_DETAILS))
2944 fprintf (vect_dump, "analyze in outer-loop: ");
2945 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2948 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2949 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2950 gcc_assert (outer_base != NULL_TREE);
2952 if (pbitpos % BITS_PER_UNIT != 0)
2954 if (vect_print_dump_info (REPORT_DETAILS))
2955 fprintf (vect_dump, "failed: bit offset alignment.\n");
2956 return false;
2959 outer_base = build_fold_addr_expr (outer_base);
2960 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2961 &base_iv, false))
2963 if (vect_print_dump_info (REPORT_DETAILS))
2964 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2965 return false;
2968 if (offset)
2970 if (poffset)
2971 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2972 poffset);
2973 else
2974 poffset = offset;
2977 if (!poffset)
2979 offset_iv.base = ssize_int (0);
2980 offset_iv.step = ssize_int (0);
2982 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2983 &offset_iv, false))
2985 if (vect_print_dump_info (REPORT_DETAILS))
2986 fprintf (vect_dump, "evolution of offset is not affine.\n");
2987 return false;
2990 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2991 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2992 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2993 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2994 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2996 outer_step = size_binop (PLUS_EXPR,
2997 fold_convert (ssizetype, base_iv.step),
2998 fold_convert (ssizetype, offset_iv.step));
3000 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3001 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3002 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3003 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3004 STMT_VINFO_DR_OFFSET (stmt_info) =
3005 fold_convert (ssizetype, offset_iv.base);
3006 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3007 size_int (highest_pow2_factor (offset_iv.base));
3009 if (vect_print_dump_info (REPORT_DETAILS))
3011 fprintf (vect_dump, "\touter base_address: ");
3012 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3013 fprintf (vect_dump, "\n\touter offset from base address: ");
3014 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3015 fprintf (vect_dump, "\n\touter constant offset from base address: ");
3016 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3017 fprintf (vect_dump, "\n\touter step: ");
3018 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3019 fprintf (vect_dump, "\n\touter aligned to: ");
3020 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3024 if (STMT_VINFO_DATA_REF (stmt_info))
3026 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3028 fprintf (vect_dump,
3029 "not vectorized: more than one data ref in stmt: ");
3030 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3033 if (bb_vinfo)
3035 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3036 stop_bb_analysis = true;
3037 continue;
3040 if (gather)
3041 free_data_ref (dr);
3042 return false;
3045 STMT_VINFO_DATA_REF (stmt_info) = dr;
3047 /* Set vectype for STMT. */
3048 scalar_type = TREE_TYPE (DR_REF (dr));
3049 STMT_VINFO_VECTYPE (stmt_info) =
3050 get_vectype_for_scalar_type (scalar_type);
3051 if (!STMT_VINFO_VECTYPE (stmt_info))
3053 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3055 fprintf (vect_dump,
3056 "not vectorized: no vectype for stmt: ");
3057 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3058 fprintf (vect_dump, " scalar_type: ");
3059 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3062 if (bb_vinfo)
3064 /* Mark the statement as not vectorizable. */
3065 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3066 stop_bb_analysis = true;
3067 continue;
3070 if (gather)
3072 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3073 free_data_ref (dr);
3075 return false;
3078 /* Adjust the minimal vectorization factor according to the
3079 vector type. */
3080 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3081 if (vf > *min_vf)
3082 *min_vf = vf;
3084 if (gather)
3086 unsigned int j, k, n;
3087 struct data_reference *olddr
3088 = VEC_index (data_reference_p, datarefs, i);
3089 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3090 struct data_dependence_relation *ddr, *newddr;
3091 bool bad = false;
3092 tree off;
3093 VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3095 if (!vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL)
3096 || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3098 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3100 fprintf (vect_dump,
3101 "not vectorized: not suitable for gather ");
3102 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3104 return false;
3107 n = VEC_length (data_reference_p, datarefs) - 1;
3108 for (j = 0, k = i - 1; j < i; j++)
3110 ddr = VEC_index (ddr_p, ddrs, k);
3111 gcc_assert (DDR_B (ddr) == olddr);
3112 newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3113 nest);
3114 VEC_replace (ddr_p, ddrs, k, newddr);
3115 free_dependence_relation (ddr);
3116 if (!bad
3117 && DR_IS_WRITE (DDR_A (newddr))
3118 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3119 bad = true;
3120 k += --n;
3123 k++;
3124 n = k + VEC_length (data_reference_p, datarefs) - i - 1;
3125 for (; k < n; k++)
3127 ddr = VEC_index (ddr_p, ddrs, k);
3128 gcc_assert (DDR_A (ddr) == olddr);
3129 newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3130 nest);
3131 VEC_replace (ddr_p, ddrs, k, newddr);
3132 free_dependence_relation (ddr);
3133 if (!bad
3134 && DR_IS_WRITE (DDR_B (newddr))
3135 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3136 bad = true;
3139 k = VEC_length (ddr_p, ddrs)
3140 - VEC_length (data_reference_p, datarefs) + i;
3141 ddr = VEC_index (ddr_p, ddrs, k);
3142 gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3143 newddr = initialize_data_dependence_relation (dr, dr, nest);
3144 VEC_replace (ddr_p, ddrs, k, newddr);
3145 free_dependence_relation (ddr);
3146 VEC_replace (data_reference_p, datarefs, i, dr);
3148 if (bad)
3150 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3152 fprintf (vect_dump,
3153 "not vectorized: data dependence conflict"
3154 " prevents gather");
3155 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3157 return false;
3160 STMT_VINFO_GATHER_P (stmt_info) = true;
3164 return true;
3168 /* Function vect_get_new_vect_var.
3170 Returns a name for a new variable. The current naming scheme appends the
3171 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3172 the name of vectorizer generated variables, and appends that to NAME if
3173 provided. */
3175 tree
3176 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3178 const char *prefix;
3179 tree new_vect_var;
3181 switch (var_kind)
3183 case vect_simple_var:
3184 prefix = "vect_";
3185 break;
3186 case vect_scalar_var:
3187 prefix = "stmp_";
3188 break;
3189 case vect_pointer_var:
3190 prefix = "vect_p";
3191 break;
3192 default:
3193 gcc_unreachable ();
3196 if (name)
3198 char* tmp = concat (prefix, name, NULL);
3199 new_vect_var = create_tmp_var (type, tmp);
3200 free (tmp);
3202 else
3203 new_vect_var = create_tmp_var (type, prefix);
3205 /* Mark vector typed variable as a gimple register variable. */
3206 if (TREE_CODE (type) == VECTOR_TYPE)
3207 DECL_GIMPLE_REG_P (new_vect_var) = true;
3209 return new_vect_var;
3213 /* Function vect_create_addr_base_for_vector_ref.
3215 Create an expression that computes the address of the first memory location
3216 that will be accessed for a data reference.
3218 Input:
3219 STMT: The statement containing the data reference.
3220 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3221 OFFSET: Optional. If supplied, it is be added to the initial address.
3222 LOOP: Specify relative to which loop-nest should the address be computed.
3223 For example, when the dataref is in an inner-loop nested in an
3224 outer-loop that is now being vectorized, LOOP can be either the
3225 outer-loop, or the inner-loop. The first memory location accessed
3226 by the following dataref ('in' points to short):
3228 for (i=0; i<N; i++)
3229 for (j=0; j<M; j++)
3230 s += in[i+j]
3232 is as follows:
3233 if LOOP=i_loop: &in (relative to i_loop)
3234 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3236 Output:
3237 1. Return an SSA_NAME whose value is the address of the memory location of
3238 the first vector of the data reference.
3239 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3240 these statement(s) which define the returned SSA_NAME.
3242 FORNOW: We are only handling array accesses with step 1. */
3244 tree
3245 vect_create_addr_base_for_vector_ref (gimple stmt,
3246 gimple_seq *new_stmt_list,
3247 tree offset,
3248 struct loop *loop)
3250 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3251 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3252 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3253 tree base_name;
3254 tree data_ref_base_var;
3255 tree vec_stmt;
3256 tree addr_base, addr_expr;
3257 tree dest;
3258 gimple_seq seq = NULL;
3259 tree base_offset = unshare_expr (DR_OFFSET (dr));
3260 tree init = unshare_expr (DR_INIT (dr));
3261 tree vect_ptr_type;
3262 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
3263 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3264 tree base;
3266 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
3268 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
3270 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
3272 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3273 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3274 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3277 if (loop_vinfo)
3278 base_name = build_fold_indirect_ref (data_ref_base);
3279 else
3281 base_offset = ssize_int (0);
3282 init = ssize_int (0);
3283 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
3286 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3287 add_referenced_var (data_ref_base_var);
3288 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3289 data_ref_base_var);
3290 gimple_seq_add_seq (new_stmt_list, seq);
3292 /* Create base_offset */
3293 base_offset = size_binop (PLUS_EXPR,
3294 fold_convert (sizetype, base_offset),
3295 fold_convert (sizetype, init));
3296 dest = create_tmp_var (sizetype, "base_off");
3297 add_referenced_var (dest);
3298 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3299 gimple_seq_add_seq (new_stmt_list, seq);
3301 if (offset)
3303 tree tmp = create_tmp_var (sizetype, "offset");
3305 add_referenced_var (tmp);
3306 offset = fold_build2 (MULT_EXPR, sizetype,
3307 fold_convert (sizetype, offset), step);
3308 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3309 base_offset, offset);
3310 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3311 gimple_seq_add_seq (new_stmt_list, seq);
3314 /* base + base_offset */
3315 if (loop_vinfo)
3316 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
3317 else
3319 addr_base = build1 (ADDR_EXPR,
3320 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3321 unshare_expr (DR_REF (dr)));
3324 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
3325 base = get_base_address (DR_REF (dr));
3326 if (base
3327 && TREE_CODE (base) == MEM_REF)
3328 vect_ptr_type
3329 = build_qualified_type (vect_ptr_type,
3330 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3332 vec_stmt = fold_convert (vect_ptr_type, addr_base);
3333 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3334 get_name (base_name));
3335 add_referenced_var (addr_expr);
3336 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
3337 gimple_seq_add_seq (new_stmt_list, seq);
3339 if (DR_PTR_INFO (dr)
3340 && TREE_CODE (vec_stmt) == SSA_NAME)
3342 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3343 if (offset)
3345 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
3346 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
3350 if (vect_print_dump_info (REPORT_DETAILS))
3352 fprintf (vect_dump, "created ");
3353 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
3356 return vec_stmt;
3360 /* Function vect_create_data_ref_ptr.
3362 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3363 location accessed in the loop by STMT, along with the def-use update
3364 chain to appropriately advance the pointer through the loop iterations.
3365 Also set aliasing information for the pointer. This pointer is used by
3366 the callers to this function to create a memory reference expression for
3367 vector load/store access.
3369 Input:
3370 1. STMT: a stmt that references memory. Expected to be of the form
3371 GIMPLE_ASSIGN <name, data-ref> or
3372 GIMPLE_ASSIGN <data-ref, name>.
3373 2. AGGR_TYPE: the type of the reference, which should be either a vector
3374 or an array.
3375 3. AT_LOOP: the loop where the vector memref is to be created.
3376 4. OFFSET (optional): an offset to be added to the initial address accessed
3377 by the data-ref in STMT.
3378 5. BSI: location where the new stmts are to be placed if there is no loop
3379 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3380 pointing to the initial address.
3382 Output:
3383 1. Declare a new ptr to vector_type, and have it point to the base of the
3384 data reference (initial addressed accessed by the data reference).
3385 For example, for vector of type V8HI, the following code is generated:
3387 v8hi *ap;
3388 ap = (v8hi *)initial_address;
3390 if OFFSET is not supplied:
3391 initial_address = &a[init];
3392 if OFFSET is supplied:
3393 initial_address = &a[init + OFFSET];
3395 Return the initial_address in INITIAL_ADDRESS.
3397 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3398 update the pointer in each iteration of the loop.
3400 Return the increment stmt that updates the pointer in PTR_INCR.
3402 3. Set INV_P to true if the access pattern of the data reference in the
3403 vectorized loop is invariant. Set it to false otherwise.
3405 4. Return the pointer. */
3407 tree
3408 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3409 tree offset, tree *initial_address,
3410 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3411 bool only_init, bool *inv_p)
3413 tree base_name;
3414 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3415 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3416 struct loop *loop = NULL;
3417 bool nested_in_vect_loop = false;
3418 struct loop *containing_loop = NULL;
3419 tree aggr_ptr_type;
3420 tree aggr_ptr;
3421 tree new_temp;
3422 gimple vec_stmt;
3423 gimple_seq new_stmt_list = NULL;
3424 edge pe = NULL;
3425 basic_block new_bb;
3426 tree aggr_ptr_init;
3427 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3428 tree aptr;
3429 gimple_stmt_iterator incr_gsi;
3430 bool insert_after;
3431 bool negative;
3432 tree indx_before_incr, indx_after_incr;
3433 gimple incr;
3434 tree step;
3435 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3436 tree base;
3438 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3439 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3441 if (loop_vinfo)
3443 loop = LOOP_VINFO_LOOP (loop_vinfo);
3444 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3445 containing_loop = (gimple_bb (stmt))->loop_father;
3446 pe = loop_preheader_edge (loop);
3448 else
3450 gcc_assert (bb_vinfo);
3451 only_init = true;
3452 *ptr_incr = NULL;
3455 /* Check the step (evolution) of the load in LOOP, and record
3456 whether it's invariant. */
3457 if (nested_in_vect_loop)
3458 step = STMT_VINFO_DR_STEP (stmt_info);
3459 else
3460 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3462 if (tree_int_cst_compare (step, size_zero_node) == 0)
3463 *inv_p = true;
3464 else
3465 *inv_p = false;
3466 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3468 /* Create an expression for the first address accessed by this load
3469 in LOOP. */
3470 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3472 if (vect_print_dump_info (REPORT_DETAILS))
3474 tree data_ref_base = base_name;
3475 fprintf (vect_dump, "create %s-pointer variable to type: ",
3476 tree_code_name[(int) TREE_CODE (aggr_type)]);
3477 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3478 if (TREE_CODE (data_ref_base) == VAR_DECL
3479 || TREE_CODE (data_ref_base) == ARRAY_REF)
3480 fprintf (vect_dump, " vectorizing an array ref: ");
3481 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3482 fprintf (vect_dump, " vectorizing a record based array ref: ");
3483 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3484 fprintf (vect_dump, " vectorizing a pointer ref: ");
3485 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3488 /* (1) Create the new aggregate-pointer variable. */
3489 aggr_ptr_type = build_pointer_type (aggr_type);
3490 base = get_base_address (DR_REF (dr));
3491 if (base
3492 && TREE_CODE (base) == MEM_REF)
3493 aggr_ptr_type
3494 = build_qualified_type (aggr_ptr_type,
3495 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3496 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3497 get_name (base_name));
3499 /* Vector and array types inherit the alias set of their component
3500 type by default so we need to use a ref-all pointer if the data
3501 reference does not conflict with the created aggregated data
3502 reference because it is not addressable. */
3503 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3504 get_alias_set (DR_REF (dr))))
3506 aggr_ptr_type
3507 = build_pointer_type_for_mode (aggr_type,
3508 TYPE_MODE (aggr_ptr_type), true);
3509 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3510 get_name (base_name));
3513 /* Likewise for any of the data references in the stmt group. */
3514 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3516 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3519 tree lhs = gimple_assign_lhs (orig_stmt);
3520 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3521 get_alias_set (lhs)))
3523 aggr_ptr_type
3524 = build_pointer_type_for_mode (aggr_type,
3525 TYPE_MODE (aggr_ptr_type), true);
3526 aggr_ptr
3527 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3528 get_name (base_name));
3529 break;
3532 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3534 while (orig_stmt);
3537 add_referenced_var (aggr_ptr);
3539 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3540 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3541 def-use update cycles for the pointer: one relative to the outer-loop
3542 (LOOP), which is what steps (3) and (4) below do. The other is relative
3543 to the inner-loop (which is the inner-most loop containing the dataref),
3544 and this is done be step (5) below.
3546 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3547 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3548 redundant. Steps (3),(4) create the following:
3550 vp0 = &base_addr;
3551 LOOP: vp1 = phi(vp0,vp2)
3554 vp2 = vp1 + step
3555 goto LOOP
3557 If there is an inner-loop nested in loop, then step (5) will also be
3558 applied, and an additional update in the inner-loop will be created:
3560 vp0 = &base_addr;
3561 LOOP: vp1 = phi(vp0,vp2)
3563 inner: vp3 = phi(vp1,vp4)
3564 vp4 = vp3 + inner_step
3565 if () goto inner
3567 vp2 = vp1 + step
3568 if () goto LOOP */
3570 /* (2) Calculate the initial address of the aggregate-pointer, and set
3571 the aggregate-pointer to point to it before the loop. */
3573 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3575 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3576 offset, loop);
3577 if (new_stmt_list)
3579 if (pe)
3581 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3582 gcc_assert (!new_bb);
3584 else
3585 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3588 *initial_address = new_temp;
3590 /* Create: p = (aggr_type *) initial_base */
3591 if (TREE_CODE (new_temp) != SSA_NAME
3592 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3594 vec_stmt = gimple_build_assign (aggr_ptr,
3595 fold_convert (aggr_ptr_type, new_temp));
3596 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3597 /* Copy the points-to information if it exists. */
3598 if (DR_PTR_INFO (dr))
3599 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3600 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3601 if (pe)
3603 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3604 gcc_assert (!new_bb);
3606 else
3607 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3609 else
3610 aggr_ptr_init = new_temp;
3612 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3613 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3614 inner-loop nested in LOOP (during outer-loop vectorization). */
3616 /* No update in loop is required. */
3617 if (only_init && (!loop_vinfo || at_loop == loop))
3618 aptr = aggr_ptr_init;
3619 else
3621 /* The step of the aggregate pointer is the type size. */
3622 tree step = TYPE_SIZE_UNIT (aggr_type);
3623 /* One exception to the above is when the scalar step of the load in
3624 LOOP is zero. In this case the step here is also zero. */
3625 if (*inv_p)
3626 step = size_zero_node;
3627 else if (negative)
3628 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3630 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3632 create_iv (aggr_ptr_init,
3633 fold_convert (aggr_ptr_type, step),
3634 aggr_ptr, loop, &incr_gsi, insert_after,
3635 &indx_before_incr, &indx_after_incr);
3636 incr = gsi_stmt (incr_gsi);
3637 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3639 /* Copy the points-to information if it exists. */
3640 if (DR_PTR_INFO (dr))
3642 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3643 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3645 if (ptr_incr)
3646 *ptr_incr = incr;
3648 aptr = indx_before_incr;
3651 if (!nested_in_vect_loop || only_init)
3652 return aptr;
3655 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3656 nested in LOOP, if exists. */
3658 gcc_assert (nested_in_vect_loop);
3659 if (!only_init)
3661 standard_iv_increment_position (containing_loop, &incr_gsi,
3662 &insert_after);
3663 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3664 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3665 &indx_after_incr);
3666 incr = gsi_stmt (incr_gsi);
3667 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3669 /* Copy the points-to information if it exists. */
3670 if (DR_PTR_INFO (dr))
3672 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3673 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3675 if (ptr_incr)
3676 *ptr_incr = incr;
3678 return indx_before_incr;
3680 else
3681 gcc_unreachable ();
3685 /* Function bump_vector_ptr
3687 Increment a pointer (to a vector type) by vector-size. If requested,
3688 i.e. if PTR-INCR is given, then also connect the new increment stmt
3689 to the existing def-use update-chain of the pointer, by modifying
3690 the PTR_INCR as illustrated below:
3692 The pointer def-use update-chain before this function:
3693 DATAREF_PTR = phi (p_0, p_2)
3694 ....
3695 PTR_INCR: p_2 = DATAREF_PTR + step
3697 The pointer def-use update-chain after this function:
3698 DATAREF_PTR = phi (p_0, p_2)
3699 ....
3700 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3701 ....
3702 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3704 Input:
3705 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3706 in the loop.
3707 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3708 the loop. The increment amount across iterations is expected
3709 to be vector_size.
3710 BSI - location where the new update stmt is to be placed.
3711 STMT - the original scalar memory-access stmt that is being vectorized.
3712 BUMP - optional. The offset by which to bump the pointer. If not given,
3713 the offset is assumed to be vector_size.
3715 Output: Return NEW_DATAREF_PTR as illustrated above.
3719 tree
3720 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3721 gimple stmt, tree bump)
3723 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3724 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3725 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3726 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3727 tree update = TYPE_SIZE_UNIT (vectype);
3728 gimple incr_stmt;
3729 ssa_op_iter iter;
3730 use_operand_p use_p;
3731 tree new_dataref_ptr;
3733 if (bump)
3734 update = bump;
3736 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3737 dataref_ptr, update);
3738 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3739 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3740 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3742 /* Copy the points-to information if it exists. */
3743 if (DR_PTR_INFO (dr))
3745 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3746 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3747 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3750 if (!ptr_incr)
3751 return new_dataref_ptr;
3753 /* Update the vector-pointer's cross-iteration increment. */
3754 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3756 tree use = USE_FROM_PTR (use_p);
3758 if (use == dataref_ptr)
3759 SET_USE (use_p, new_dataref_ptr);
3760 else
3761 gcc_assert (tree_int_cst_compare (use, update) == 0);
3764 return new_dataref_ptr;
3768 /* Function vect_create_destination_var.
3770 Create a new temporary of type VECTYPE. */
3772 tree
3773 vect_create_destination_var (tree scalar_dest, tree vectype)
3775 tree vec_dest;
3776 const char *new_name;
3777 tree type;
3778 enum vect_var_kind kind;
3780 kind = vectype ? vect_simple_var : vect_scalar_var;
3781 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3783 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3785 new_name = get_name (scalar_dest);
3786 if (!new_name)
3787 new_name = "var_";
3788 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3789 add_referenced_var (vec_dest);
3791 return vec_dest;
3794 /* Function vect_strided_store_supported.
3796 Returns TRUE if interleave high and interleave low permutations
3797 are supported, and FALSE otherwise. */
3799 bool
3800 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3802 enum machine_mode mode = TYPE_MODE (vectype);
3804 /* vect_permute_store_chain requires the group size to be a power of two. */
3805 if (exact_log2 (count) == -1)
3807 if (vect_print_dump_info (REPORT_DETAILS))
3808 fprintf (vect_dump, "the size of the group of strided accesses"
3809 " is not a power of 2");
3810 return false;
3813 /* Check that the permutation is supported. */
3814 if (VECTOR_MODE_P (mode))
3816 unsigned int i, nelt = GET_MODE_NUNITS (mode);
3817 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3818 for (i = 0; i < nelt / 2; i++)
3820 sel[i * 2] = i;
3821 sel[i * 2 + 1] = i + nelt;
3823 if (can_vec_perm_p (mode, false, sel))
3825 for (i = 0; i < nelt; i++)
3826 sel[i] += nelt / 2;
3827 if (can_vec_perm_p (mode, false, sel))
3828 return true;
3832 if (vect_print_dump_info (REPORT_DETAILS))
3833 fprintf (vect_dump, "interleave op not supported by target.");
3834 return false;
3838 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3839 type VECTYPE. */
3841 bool
3842 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3844 return vect_lanes_optab_supported_p ("vec_store_lanes",
3845 vec_store_lanes_optab,
3846 vectype, count);
3850 /* Function vect_permute_store_chain.
3852 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3853 a power of 2, generate interleave_high/low stmts to reorder the data
3854 correctly for the stores. Return the final references for stores in
3855 RESULT_CHAIN.
3857 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3858 The input is 4 vectors each containing 8 elements. We assign a number to
3859 each element, the input sequence is:
3861 1st vec: 0 1 2 3 4 5 6 7
3862 2nd vec: 8 9 10 11 12 13 14 15
3863 3rd vec: 16 17 18 19 20 21 22 23
3864 4th vec: 24 25 26 27 28 29 30 31
3866 The output sequence should be:
3868 1st vec: 0 8 16 24 1 9 17 25
3869 2nd vec: 2 10 18 26 3 11 19 27
3870 3rd vec: 4 12 20 28 5 13 21 30
3871 4th vec: 6 14 22 30 7 15 23 31
3873 i.e., we interleave the contents of the four vectors in their order.
3875 We use interleave_high/low instructions to create such output. The input of
3876 each interleave_high/low operation is two vectors:
3877 1st vec 2nd vec
3878 0 1 2 3 4 5 6 7
3879 the even elements of the result vector are obtained left-to-right from the
3880 high/low elements of the first vector. The odd elements of the result are
3881 obtained left-to-right from the high/low elements of the second vector.
3882 The output of interleave_high will be: 0 4 1 5
3883 and of interleave_low: 2 6 3 7
3886 The permutation is done in log LENGTH stages. In each stage interleave_high
3887 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3888 where the first argument is taken from the first half of DR_CHAIN and the
3889 second argument from it's second half.
3890 In our example,
3892 I1: interleave_high (1st vec, 3rd vec)
3893 I2: interleave_low (1st vec, 3rd vec)
3894 I3: interleave_high (2nd vec, 4th vec)
3895 I4: interleave_low (2nd vec, 4th vec)
3897 The output for the first stage is:
3899 I1: 0 16 1 17 2 18 3 19
3900 I2: 4 20 5 21 6 22 7 23
3901 I3: 8 24 9 25 10 26 11 27
3902 I4: 12 28 13 29 14 30 15 31
3904 The output of the second stage, i.e. the final result is:
3906 I1: 0 8 16 24 1 9 17 25
3907 I2: 2 10 18 26 3 11 19 27
3908 I3: 4 12 20 28 5 13 21 30
3909 I4: 6 14 22 30 7 15 23 31. */
3911 void
3912 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3913 unsigned int length,
3914 gimple stmt,
3915 gimple_stmt_iterator *gsi,
3916 VEC(tree,heap) **result_chain)
3918 tree perm_dest, vect1, vect2, high, low;
3919 gimple perm_stmt;
3920 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3921 tree perm_mask_low, perm_mask_high;
3922 unsigned int i, n;
3923 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
3924 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3926 *result_chain = VEC_copy (tree, heap, dr_chain);
3928 for (i = 0, n = nelt / 2; i < n; i++)
3930 sel[i * 2] = i;
3931 sel[i * 2 + 1] = i + nelt;
3933 perm_mask_high = vect_gen_perm_mask (vectype, sel);
3934 gcc_assert (perm_mask_high != NULL);
3936 for (i = 0; i < nelt; i++)
3937 sel[i] += nelt / 2;
3938 perm_mask_low = vect_gen_perm_mask (vectype, sel);
3939 gcc_assert (perm_mask_low != NULL);
3941 for (i = 0, n = exact_log2 (length); i < n; i++)
3943 for (j = 0; j < length/2; j++)
3945 vect1 = VEC_index (tree, dr_chain, j);
3946 vect2 = VEC_index (tree, dr_chain, j+length/2);
3948 /* Create interleaving stmt:
3949 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
3950 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3951 DECL_GIMPLE_REG_P (perm_dest) = 1;
3952 add_referenced_var (perm_dest);
3953 high = make_ssa_name (perm_dest, NULL);
3954 perm_stmt
3955 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, high,
3956 vect1, vect2, perm_mask_high);
3957 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3958 VEC_replace (tree, *result_chain, 2*j, high);
3960 /* Create interleaving stmt:
3961 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
3962 nelt*3/2+1, ...}> */
3963 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3964 DECL_GIMPLE_REG_P (perm_dest) = 1;
3965 add_referenced_var (perm_dest);
3966 low = make_ssa_name (perm_dest, NULL);
3967 perm_stmt
3968 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, low,
3969 vect1, vect2, perm_mask_low);
3970 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3971 VEC_replace (tree, *result_chain, 2*j+1, low);
3973 dr_chain = VEC_copy (tree, heap, *result_chain);
3977 /* Function vect_setup_realignment
3979 This function is called when vectorizing an unaligned load using
3980 the dr_explicit_realign[_optimized] scheme.
3981 This function generates the following code at the loop prolog:
3983 p = initial_addr;
3984 x msq_init = *(floor(p)); # prolog load
3985 realignment_token = call target_builtin;
3986 loop:
3987 x msq = phi (msq_init, ---)
3989 The stmts marked with x are generated only for the case of
3990 dr_explicit_realign_optimized.
3992 The code above sets up a new (vector) pointer, pointing to the first
3993 location accessed by STMT, and a "floor-aligned" load using that pointer.
3994 It also generates code to compute the "realignment-token" (if the relevant
3995 target hook was defined), and creates a phi-node at the loop-header bb
3996 whose arguments are the result of the prolog-load (created by this
3997 function) and the result of a load that takes place in the loop (to be
3998 created by the caller to this function).
4000 For the case of dr_explicit_realign_optimized:
4001 The caller to this function uses the phi-result (msq) to create the
4002 realignment code inside the loop, and sets up the missing phi argument,
4003 as follows:
4004 loop:
4005 msq = phi (msq_init, lsq)
4006 lsq = *(floor(p')); # load in loop
4007 result = realign_load (msq, lsq, realignment_token);
4009 For the case of dr_explicit_realign:
4010 loop:
4011 msq = *(floor(p)); # load in loop
4012 p' = p + (VS-1);
4013 lsq = *(floor(p')); # load in loop
4014 result = realign_load (msq, lsq, realignment_token);
4016 Input:
4017 STMT - (scalar) load stmt to be vectorized. This load accesses
4018 a memory location that may be unaligned.
4019 BSI - place where new code is to be inserted.
4020 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4021 is used.
4023 Output:
4024 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4025 target hook, if defined.
4026 Return value - the result of the loop-header phi node. */
4028 tree
4029 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4030 tree *realignment_token,
4031 enum dr_alignment_support alignment_support_scheme,
4032 tree init_addr,
4033 struct loop **at_loop)
4035 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4036 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4037 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4038 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4039 struct loop *loop = NULL;
4040 edge pe = NULL;
4041 tree scalar_dest = gimple_assign_lhs (stmt);
4042 tree vec_dest;
4043 gimple inc;
4044 tree ptr;
4045 tree data_ref;
4046 gimple new_stmt;
4047 basic_block new_bb;
4048 tree msq_init = NULL_TREE;
4049 tree new_temp;
4050 gimple phi_stmt;
4051 tree msq = NULL_TREE;
4052 gimple_seq stmts = NULL;
4053 bool inv_p;
4054 bool compute_in_loop = false;
4055 bool nested_in_vect_loop = false;
4056 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4057 struct loop *loop_for_initial_load = NULL;
4059 if (loop_vinfo)
4061 loop = LOOP_VINFO_LOOP (loop_vinfo);
4062 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4065 gcc_assert (alignment_support_scheme == dr_explicit_realign
4066 || alignment_support_scheme == dr_explicit_realign_optimized);
4068 /* We need to generate three things:
4069 1. the misalignment computation
4070 2. the extra vector load (for the optimized realignment scheme).
4071 3. the phi node for the two vectors from which the realignment is
4072 done (for the optimized realignment scheme). */
4074 /* 1. Determine where to generate the misalignment computation.
4076 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4077 calculation will be generated by this function, outside the loop (in the
4078 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4079 caller, inside the loop.
4081 Background: If the misalignment remains fixed throughout the iterations of
4082 the loop, then both realignment schemes are applicable, and also the
4083 misalignment computation can be done outside LOOP. This is because we are
4084 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4085 are a multiple of VS (the Vector Size), and therefore the misalignment in
4086 different vectorized LOOP iterations is always the same.
4087 The problem arises only if the memory access is in an inner-loop nested
4088 inside LOOP, which is now being vectorized using outer-loop vectorization.
4089 This is the only case when the misalignment of the memory access may not
4090 remain fixed throughout the iterations of the inner-loop (as explained in
4091 detail in vect_supportable_dr_alignment). In this case, not only is the
4092 optimized realignment scheme not applicable, but also the misalignment
4093 computation (and generation of the realignment token that is passed to
4094 REALIGN_LOAD) have to be done inside the loop.
4096 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4097 or not, which in turn determines if the misalignment is computed inside
4098 the inner-loop, or outside LOOP. */
4100 if (init_addr != NULL_TREE || !loop_vinfo)
4102 compute_in_loop = true;
4103 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4107 /* 2. Determine where to generate the extra vector load.
4109 For the optimized realignment scheme, instead of generating two vector
4110 loads in each iteration, we generate a single extra vector load in the
4111 preheader of the loop, and in each iteration reuse the result of the
4112 vector load from the previous iteration. In case the memory access is in
4113 an inner-loop nested inside LOOP, which is now being vectorized using
4114 outer-loop vectorization, we need to determine whether this initial vector
4115 load should be generated at the preheader of the inner-loop, or can be
4116 generated at the preheader of LOOP. If the memory access has no evolution
4117 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4118 to be generated inside LOOP (in the preheader of the inner-loop). */
4120 if (nested_in_vect_loop)
4122 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4123 bool invariant_in_outerloop =
4124 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4125 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4127 else
4128 loop_for_initial_load = loop;
4129 if (at_loop)
4130 *at_loop = loop_for_initial_load;
4132 if (loop_for_initial_load)
4133 pe = loop_preheader_edge (loop_for_initial_load);
4135 /* 3. For the case of the optimized realignment, create the first vector
4136 load at the loop preheader. */
4138 if (alignment_support_scheme == dr_explicit_realign_optimized)
4140 /* Create msq_init = *(floor(p1)) in the loop preheader */
4142 gcc_assert (!compute_in_loop);
4143 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4144 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4145 NULL_TREE, &init_addr, NULL, &inc,
4146 true, &inv_p);
4147 new_stmt = gimple_build_assign_with_ops
4148 (BIT_AND_EXPR, NULL_TREE, ptr,
4149 build_int_cst (TREE_TYPE (ptr),
4150 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4151 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
4152 gimple_assign_set_lhs (new_stmt, new_temp);
4153 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4154 gcc_assert (!new_bb);
4155 data_ref
4156 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4157 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
4158 new_stmt = gimple_build_assign (vec_dest, data_ref);
4159 new_temp = make_ssa_name (vec_dest, new_stmt);
4160 gimple_assign_set_lhs (new_stmt, new_temp);
4161 mark_symbols_for_renaming (new_stmt);
4162 if (pe)
4164 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4165 gcc_assert (!new_bb);
4167 else
4168 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4170 msq_init = gimple_assign_lhs (new_stmt);
4173 /* 4. Create realignment token using a target builtin, if available.
4174 It is done either inside the containing loop, or before LOOP (as
4175 determined above). */
4177 if (targetm.vectorize.builtin_mask_for_load)
4179 tree builtin_decl;
4181 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4182 if (!init_addr)
4184 /* Generate the INIT_ADDR computation outside LOOP. */
4185 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4186 NULL_TREE, loop);
4187 if (loop)
4189 pe = loop_preheader_edge (loop);
4190 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4191 gcc_assert (!new_bb);
4193 else
4194 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
4197 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4198 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4199 vec_dest =
4200 vect_create_destination_var (scalar_dest,
4201 gimple_call_return_type (new_stmt));
4202 new_temp = make_ssa_name (vec_dest, new_stmt);
4203 gimple_call_set_lhs (new_stmt, new_temp);
4205 if (compute_in_loop)
4206 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4207 else
4209 /* Generate the misalignment computation outside LOOP. */
4210 pe = loop_preheader_edge (loop);
4211 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4212 gcc_assert (!new_bb);
4215 *realignment_token = gimple_call_lhs (new_stmt);
4217 /* The result of the CALL_EXPR to this builtin is determined from
4218 the value of the parameter and no global variables are touched
4219 which makes the builtin a "const" function. Requiring the
4220 builtin to have the "const" attribute makes it unnecessary
4221 to call mark_call_clobbered. */
4222 gcc_assert (TREE_READONLY (builtin_decl));
4225 if (alignment_support_scheme == dr_explicit_realign)
4226 return msq;
4228 gcc_assert (!compute_in_loop);
4229 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4232 /* 5. Create msq = phi <msq_init, lsq> in loop */
4234 pe = loop_preheader_edge (containing_loop);
4235 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4236 msq = make_ssa_name (vec_dest, NULL);
4237 phi_stmt = create_phi_node (msq, containing_loop->header);
4238 SSA_NAME_DEF_STMT (msq) = phi_stmt;
4239 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
4241 return msq;
4245 /* Function vect_strided_load_supported.
4247 Returns TRUE if even and odd permutations are supported,
4248 and FALSE otherwise. */
4250 bool
4251 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
4253 enum machine_mode mode = TYPE_MODE (vectype);
4255 /* vect_permute_load_chain requires the group size to be a power of two. */
4256 if (exact_log2 (count) == -1)
4258 if (vect_print_dump_info (REPORT_DETAILS))
4259 fprintf (vect_dump, "the size of the group of strided accesses"
4260 " is not a power of 2");
4261 return false;
4264 /* Check that the permutation is supported. */
4265 if (VECTOR_MODE_P (mode))
4267 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4268 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4270 for (i = 0; i < nelt; i++)
4271 sel[i] = i * 2;
4272 if (can_vec_perm_p (mode, false, sel))
4274 for (i = 0; i < nelt; i++)
4275 sel[i] = i * 2 + 1;
4276 if (can_vec_perm_p (mode, false, sel))
4277 return true;
4281 if (vect_print_dump_info (REPORT_DETAILS))
4282 fprintf (vect_dump, "extract even/odd not supported by target");
4283 return false;
4286 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4287 type VECTYPE. */
4289 bool
4290 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4292 return vect_lanes_optab_supported_p ("vec_load_lanes",
4293 vec_load_lanes_optab,
4294 vectype, count);
4297 /* Function vect_permute_load_chain.
4299 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4300 a power of 2, generate extract_even/odd stmts to reorder the input data
4301 correctly. Return the final references for loads in RESULT_CHAIN.
4303 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4304 The input is 4 vectors each containing 8 elements. We assign a number to each
4305 element, the input sequence is:
4307 1st vec: 0 1 2 3 4 5 6 7
4308 2nd vec: 8 9 10 11 12 13 14 15
4309 3rd vec: 16 17 18 19 20 21 22 23
4310 4th vec: 24 25 26 27 28 29 30 31
4312 The output sequence should be:
4314 1st vec: 0 4 8 12 16 20 24 28
4315 2nd vec: 1 5 9 13 17 21 25 29
4316 3rd vec: 2 6 10 14 18 22 26 30
4317 4th vec: 3 7 11 15 19 23 27 31
4319 i.e., the first output vector should contain the first elements of each
4320 interleaving group, etc.
4322 We use extract_even/odd instructions to create such output. The input of
4323 each extract_even/odd operation is two vectors
4324 1st vec 2nd vec
4325 0 1 2 3 4 5 6 7
4327 and the output is the vector of extracted even/odd elements. The output of
4328 extract_even will be: 0 2 4 6
4329 and of extract_odd: 1 3 5 7
4332 The permutation is done in log LENGTH stages. In each stage extract_even
4333 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4334 their order. In our example,
4336 E1: extract_even (1st vec, 2nd vec)
4337 E2: extract_odd (1st vec, 2nd vec)
4338 E3: extract_even (3rd vec, 4th vec)
4339 E4: extract_odd (3rd vec, 4th vec)
4341 The output for the first stage will be:
4343 E1: 0 2 4 6 8 10 12 14
4344 E2: 1 3 5 7 9 11 13 15
4345 E3: 16 18 20 22 24 26 28 30
4346 E4: 17 19 21 23 25 27 29 31
4348 In order to proceed and create the correct sequence for the next stage (or
4349 for the correct output, if the second stage is the last one, as in our
4350 example), we first put the output of extract_even operation and then the
4351 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4352 The input for the second stage is:
4354 1st vec (E1): 0 2 4 6 8 10 12 14
4355 2nd vec (E3): 16 18 20 22 24 26 28 30
4356 3rd vec (E2): 1 3 5 7 9 11 13 15
4357 4th vec (E4): 17 19 21 23 25 27 29 31
4359 The output of the second stage:
4361 E1: 0 4 8 12 16 20 24 28
4362 E2: 2 6 10 14 18 22 26 30
4363 E3: 1 5 9 13 17 21 25 29
4364 E4: 3 7 11 15 19 23 27 31
4366 And RESULT_CHAIN after reordering:
4368 1st vec (E1): 0 4 8 12 16 20 24 28
4369 2nd vec (E3): 1 5 9 13 17 21 25 29
4370 3rd vec (E2): 2 6 10 14 18 22 26 30
4371 4th vec (E4): 3 7 11 15 19 23 27 31. */
4373 static void
4374 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4375 unsigned int length,
4376 gimple stmt,
4377 gimple_stmt_iterator *gsi,
4378 VEC(tree,heap) **result_chain)
4380 tree perm_dest, data_ref, first_vect, second_vect;
4381 tree perm_mask_even, perm_mask_odd;
4382 gimple perm_stmt;
4383 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4384 unsigned int i, j, log_length = exact_log2 (length);
4385 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4386 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4388 *result_chain = VEC_copy (tree, heap, dr_chain);
4390 for (i = 0; i < nelt; ++i)
4391 sel[i] = i * 2;
4392 perm_mask_even = vect_gen_perm_mask (vectype, sel);
4393 gcc_assert (perm_mask_even != NULL);
4395 for (i = 0; i < nelt; ++i)
4396 sel[i] = i * 2 + 1;
4397 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4398 gcc_assert (perm_mask_odd != NULL);
4400 for (i = 0; i < log_length; i++)
4402 for (j = 0; j < length; j += 2)
4404 first_vect = VEC_index (tree, dr_chain, j);
4405 second_vect = VEC_index (tree, dr_chain, j+1);
4407 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4408 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4409 DECL_GIMPLE_REG_P (perm_dest) = 1;
4410 add_referenced_var (perm_dest);
4412 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4413 first_vect, second_vect,
4414 perm_mask_even);
4416 data_ref = make_ssa_name (perm_dest, perm_stmt);
4417 gimple_assign_set_lhs (perm_stmt, data_ref);
4418 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4419 mark_symbols_for_renaming (perm_stmt);
4421 VEC_replace (tree, *result_chain, j/2, data_ref);
4423 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4424 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4425 DECL_GIMPLE_REG_P (perm_dest) = 1;
4426 add_referenced_var (perm_dest);
4428 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4429 first_vect, second_vect,
4430 perm_mask_odd);
4432 data_ref = make_ssa_name (perm_dest, perm_stmt);
4433 gimple_assign_set_lhs (perm_stmt, data_ref);
4434 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4435 mark_symbols_for_renaming (perm_stmt);
4437 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4439 dr_chain = VEC_copy (tree, heap, *result_chain);
4444 /* Function vect_transform_strided_load.
4446 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4447 to perform their permutation and ascribe the result vectorized statements to
4448 the scalar statements.
4451 void
4452 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4453 gimple_stmt_iterator *gsi)
4455 VEC(tree,heap) *result_chain = NULL;
4457 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4458 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4459 vectors, that are ready for vector computation. */
4460 result_chain = VEC_alloc (tree, heap, size);
4461 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4462 vect_record_strided_load_vectors (stmt, result_chain);
4463 VEC_free (tree, heap, result_chain);
4466 /* RESULT_CHAIN contains the output of a group of strided loads that were
4467 generated as part of the vectorization of STMT. Assign the statement
4468 for each vector to the associated scalar statement. */
4470 void
4471 vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4473 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4474 gimple next_stmt, new_stmt;
4475 unsigned int i, gap_count;
4476 tree tmp_data_ref;
4478 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4479 Since we scan the chain starting from it's first node, their order
4480 corresponds the order of data-refs in RESULT_CHAIN. */
4481 next_stmt = first_stmt;
4482 gap_count = 1;
4483 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4485 if (!next_stmt)
4486 break;
4488 /* Skip the gaps. Loads created for the gaps will be removed by dead
4489 code elimination pass later. No need to check for the first stmt in
4490 the group, since it always exists.
4491 GROUP_GAP is the number of steps in elements from the previous
4492 access (if there is no gap GROUP_GAP is 1). We skip loads that
4493 correspond to the gaps. */
4494 if (next_stmt != first_stmt
4495 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4497 gap_count++;
4498 continue;
4501 while (next_stmt)
4503 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4504 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4505 copies, and we put the new vector statement in the first available
4506 RELATED_STMT. */
4507 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4508 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4509 else
4511 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4513 gimple prev_stmt =
4514 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4515 gimple rel_stmt =
4516 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4517 while (rel_stmt)
4519 prev_stmt = rel_stmt;
4520 rel_stmt =
4521 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4524 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4525 new_stmt;
4529 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4530 gap_count = 1;
4531 /* If NEXT_STMT accesses the same DR as the previous statement,
4532 put the same TMP_DATA_REF as its vectorized statement; otherwise
4533 get the next data-ref from RESULT_CHAIN. */
4534 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4535 break;
4540 /* Function vect_force_dr_alignment_p.
4542 Returns whether the alignment of a DECL can be forced to be aligned
4543 on ALIGNMENT bit boundary. */
4545 bool
4546 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4548 if (TREE_CODE (decl) != VAR_DECL)
4549 return false;
4551 /* We cannot change alignment of common or external symbols as another
4552 translation unit may contain a definition with lower alignment.
4553 The rules of common symbol linking mean that the definition
4554 will override the common symbol. */
4555 if (DECL_EXTERNAL (decl)
4556 || DECL_COMMON (decl))
4557 return false;
4559 if (TREE_ASM_WRITTEN (decl))
4560 return false;
4562 if (TREE_STATIC (decl))
4563 return (alignment <= MAX_OFILE_ALIGNMENT);
4564 else
4565 return (alignment <= MAX_STACK_ALIGNMENT);
4569 /* Return whether the data reference DR is supported with respect to its
4570 alignment.
4571 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4572 it is aligned, i.e., check if it is possible to vectorize it with different
4573 alignment. */
4575 enum dr_alignment_support
4576 vect_supportable_dr_alignment (struct data_reference *dr,
4577 bool check_aligned_accesses)
4579 gimple stmt = DR_STMT (dr);
4580 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4581 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4582 enum machine_mode mode = TYPE_MODE (vectype);
4583 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4584 struct loop *vect_loop = NULL;
4585 bool nested_in_vect_loop = false;
4587 if (aligned_access_p (dr) && !check_aligned_accesses)
4588 return dr_aligned;
4590 if (loop_vinfo)
4592 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4593 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4596 /* Possibly unaligned access. */
4598 /* We can choose between using the implicit realignment scheme (generating
4599 a misaligned_move stmt) and the explicit realignment scheme (generating
4600 aligned loads with a REALIGN_LOAD). There are two variants to the
4601 explicit realignment scheme: optimized, and unoptimized.
4602 We can optimize the realignment only if the step between consecutive
4603 vector loads is equal to the vector size. Since the vector memory
4604 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4605 is guaranteed that the misalignment amount remains the same throughout the
4606 execution of the vectorized loop. Therefore, we can create the
4607 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4608 at the loop preheader.
4610 However, in the case of outer-loop vectorization, when vectorizing a
4611 memory access in the inner-loop nested within the LOOP that is now being
4612 vectorized, while it is guaranteed that the misalignment of the
4613 vectorized memory access will remain the same in different outer-loop
4614 iterations, it is *not* guaranteed that is will remain the same throughout
4615 the execution of the inner-loop. This is because the inner-loop advances
4616 with the original scalar step (and not in steps of VS). If the inner-loop
4617 step happens to be a multiple of VS, then the misalignment remains fixed
4618 and we can use the optimized realignment scheme. For example:
4620 for (i=0; i<N; i++)
4621 for (j=0; j<M; j++)
4622 s += a[i+j];
4624 When vectorizing the i-loop in the above example, the step between
4625 consecutive vector loads is 1, and so the misalignment does not remain
4626 fixed across the execution of the inner-loop, and the realignment cannot
4627 be optimized (as illustrated in the following pseudo vectorized loop):
4629 for (i=0; i<N; i+=4)
4630 for (j=0; j<M; j++){
4631 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4632 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4633 // (assuming that we start from an aligned address).
4636 We therefore have to use the unoptimized realignment scheme:
4638 for (i=0; i<N; i+=4)
4639 for (j=k; j<M; j+=4)
4640 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4641 // that the misalignment of the initial address is
4642 // 0).
4644 The loop can then be vectorized as follows:
4646 for (k=0; k<4; k++){
4647 rt = get_realignment_token (&vp[k]);
4648 for (i=0; i<N; i+=4){
4649 v1 = vp[i+k];
4650 for (j=k; j<M; j+=4){
4651 v2 = vp[i+j+VS-1];
4652 va = REALIGN_LOAD <v1,v2,rt>;
4653 vs += va;
4654 v1 = v2;
4657 } */
4659 if (DR_IS_READ (dr))
4661 bool is_packed = false;
4662 tree type = (TREE_TYPE (DR_REF (dr)));
4664 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4665 && (!targetm.vectorize.builtin_mask_for_load
4666 || targetm.vectorize.builtin_mask_for_load ()))
4668 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4669 if ((nested_in_vect_loop
4670 && (TREE_INT_CST_LOW (DR_STEP (dr))
4671 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4672 || !loop_vinfo)
4673 return dr_explicit_realign;
4674 else
4675 return dr_explicit_realign_optimized;
4677 if (!known_alignment_for_access_p (dr))
4678 is_packed = contains_packed_reference (DR_REF (dr));
4680 if (targetm.vectorize.
4681 support_vector_misalignment (mode, type,
4682 DR_MISALIGNMENT (dr), is_packed))
4683 /* Can't software pipeline the loads, but can at least do them. */
4684 return dr_unaligned_supported;
4686 else
4688 bool is_packed = false;
4689 tree type = (TREE_TYPE (DR_REF (dr)));
4691 if (!known_alignment_for_access_p (dr))
4692 is_packed = contains_packed_reference (DR_REF (dr));
4694 if (targetm.vectorize.
4695 support_vector_misalignment (mode, type,
4696 DR_MISALIGNMENT (dr), is_packed))
4697 return dr_unaligned_supported;
4700 /* Unsupported. */
4701 return dr_unaligned_unsupported;