mutex (timed_mutex, [...]): Update to use steady_clock instead of monotonic_clock.
[official-gcc.git] / gcc / tree-vect-data-refs.c
blobf0d2f0d06b1bf326a699a6733f86776a5a6ac4a2
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
49 static bool
50 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 tree vectype, unsigned HOST_WIDE_INT count)
53 enum machine_mode mode, array_mode;
54 bool limit_p;
56 mode = TYPE_MODE (vectype);
57 limit_p = !targetm.array_mode_supported_p (mode, count);
58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
59 MODE_INT, limit_p);
61 if (array_mode == BLKmode)
63 if (vect_print_dump_info (REPORT_DETAILS))
64 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65 GET_MODE_NAME (mode), count);
66 return false;
69 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
71 if (vect_print_dump_info (REPORT_DETAILS))
72 fprintf (vect_dump, "cannot use %s<%s><%s>",
73 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
74 return false;
77 if (vect_print_dump_info (REPORT_DETAILS))
78 fprintf (vect_dump, "can use %s<%s><%s>",
79 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
81 return true;
85 /* Return the smallest scalar part of STMT.
86 This is used to determine the vectype of the stmt. We generally set the
87 vectype according to the type of the result (lhs). For stmts whose
88 result-type is different than the type of the arguments (e.g., demotion,
89 promotion), vectype will be reset appropriately (later). Note that we have
90 to visit the smallest datatype in this function, because that determines the
91 VF. If the smallest datatype in the loop is present only as the rhs of a
92 promotion operation - we'd miss it.
93 Such a case, where a variable of this datatype does not appear in the lhs
94 anywhere in the loop, can only occur if it's an invariant: e.g.:
95 'int_x = (int) short_inv', which we'd expect to have been optimized away by
96 invariant motion. However, we cannot rely on invariant motion to always
97 take invariants out of the loop, and so in the case of promotion we also
98 have to check the rhs.
99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
100 types. */
102 tree
103 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104 HOST_WIDE_INT *rhs_size_unit)
106 tree scalar_type = gimple_expr_type (stmt);
107 HOST_WIDE_INT lhs, rhs;
109 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
111 if (is_gimple_assign (stmt)
112 && (gimple_assign_cast_p (stmt)
113 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119 if (rhs < lhs)
120 scalar_type = rhs_type;
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
125 return scalar_type;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135 gimple next_stmt = first_stmt;
136 int result = 0;
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139 return -1;
141 while (next_stmt && next_stmt != stmt)
143 result++;
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
147 if (next_stmt)
148 return result;
149 else
150 return -1;
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
158 static void
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
162 gimple prev, next;
163 tree next_init;
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169 while (next)
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
174 /* Insert here. */
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177 return;
179 prev = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
196 FIRST_DR = DRB
197 NEXT_DR (DRB) = DRA
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
204 FIRST_DR = DRB
205 NEXT(FIRST_DR) = previous FIRST_DR
206 else
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
212 static void
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
220 tree node_init;
221 gimple node, prev, next, first_stmt;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229 return;
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
238 return;
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246 old_first_stmt)));
247 gimple tmp;
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
259 while (tmp)
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
265 else
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
271 return;
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
278 return;
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
291 else
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
301 while (node)
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305 while (next)
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
310 /* Insert here. */
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313 prev = node;
314 break;
316 prev = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
319 if (!next)
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324 prev = node;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
337 static bool
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342 gimple earlier_stmt;
344 /* We only call this function for pairs of loads and stores, but we verify
345 it here. */
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 if (DR_IS_READ (dra))
349 return false;
350 else
351 return true;
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
358 return true;
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
367 return true;
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
374 return false;
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383 return false;
385 return true;
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
394 static bool
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
406 return false;
408 /* Check:
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
412 data-refs' inits. */
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
420 return false;
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
426 if (init_a > init_b)
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
432 if (step && (init_a - init_b) > step)
433 return false;
435 if (diff_mod_size == 0)
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
445 return true;
448 else
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
454 if (step && (init_b - init_a) > step)
455 return false;
457 if (diff_mod_size == 0)
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
467 return true;
471 return false;
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
478 static bool
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489 return true;
490 else
491 return false;
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
497 static bool
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504 return true;
505 else
506 return false;
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
513 static bool
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519 return false;
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
529 if (optimize_loop_nest_for_size_p (loop))
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
533 return false;
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
537 if (loop->inner)
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541 return false;
544 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
545 return true;
549 /* Function vect_analyze_data_ref_dependence.
551 Return TRUE if there (might) exist a dependence between a memory-reference
552 DRA and a memory-reference DRB. When versioning for alias may check a
553 dependence at run-time, return FALSE. Adjust *MAX_VF according to
554 the data dependence. */
556 static bool
557 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
558 loop_vec_info loop_vinfo, int *max_vf,
559 bool *data_dependence_in_bb)
561 unsigned int i;
562 struct loop *loop = NULL;
563 struct data_reference *dra = DDR_A (ddr);
564 struct data_reference *drb = DDR_B (ddr);
565 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
566 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
567 lambda_vector dist_v;
568 unsigned int loop_depth;
570 /* Don't bother to analyze statements marked as unvectorizable. */
571 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
572 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
573 return false;
575 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
577 /* Independent data accesses. */
578 vect_check_interleaving (dra, drb);
579 return false;
582 if (loop_vinfo)
583 loop = LOOP_VINFO_LOOP (loop_vinfo);
585 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
586 return false;
588 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
590 if (loop_vinfo)
592 if (vect_print_dump_info (REPORT_DR_DETAILS))
594 fprintf (vect_dump, "versioning for alias required: "
595 "can't determine dependence between ");
596 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
597 fprintf (vect_dump, " and ");
598 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
601 /* Add to list of ddrs that need to be tested at run-time. */
602 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
605 /* When vectorizing a basic block unknown depnedence can still mean
606 strided access. */
607 if (vect_check_interleaving (dra, drb))
608 return false;
610 if (vect_print_dump_info (REPORT_DR_DETAILS))
612 fprintf (vect_dump, "can't determine dependence between ");
613 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
614 fprintf (vect_dump, " and ");
615 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
618 /* We do not vectorize basic blocks with write-write dependencies. */
619 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
620 return true;
622 /* We deal with read-write dependencies in basic blocks later (by
623 verifying that all the loads in the basic block are before all the
624 stores). */
625 *data_dependence_in_bb = true;
626 return false;
629 /* Versioning for alias is not yet supported for basic block SLP, and
630 dependence distance is unapplicable, hence, in case of known data
631 dependence, basic block vectorization is impossible for now. */
632 if (!loop_vinfo)
634 if (dra != drb && vect_check_interleaving (dra, drb))
635 return false;
637 if (vect_print_dump_info (REPORT_DR_DETAILS))
639 fprintf (vect_dump, "determined dependence between ");
640 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
641 fprintf (vect_dump, " and ");
642 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
645 /* Do not vectorize basic blcoks with write-write dependences. */
646 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
647 return true;
649 /* Check if this dependence is allowed in basic block vectorization. */
650 return vect_drs_dependent_in_basic_block (dra, drb);
653 /* Loop-based vectorization and known data dependence. */
654 if (DDR_NUM_DIST_VECTS (ddr) == 0)
656 if (vect_print_dump_info (REPORT_DR_DETAILS))
658 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
659 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
660 fprintf (vect_dump, " and ");
661 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
663 /* Add to list of ddrs that need to be tested at run-time. */
664 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
667 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
668 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
670 int dist = dist_v[loop_depth];
672 if (vect_print_dump_info (REPORT_DR_DETAILS))
673 fprintf (vect_dump, "dependence distance = %d.", dist);
675 if (dist == 0)
677 if (vect_print_dump_info (REPORT_DR_DETAILS))
679 fprintf (vect_dump, "dependence distance == 0 between ");
680 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
681 fprintf (vect_dump, " and ");
682 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
685 /* For interleaving, mark that there is a read-write dependency if
686 necessary. We check before that one of the data-refs is store. */
687 if (DR_IS_READ (dra))
688 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
689 else
691 if (DR_IS_READ (drb))
692 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
695 continue;
698 if (dist > 0 && DDR_REVERSED_P (ddr))
700 /* If DDR_REVERSED_P the order of the data-refs in DDR was
701 reversed (to make distance vector positive), and the actual
702 distance is negative. */
703 if (vect_print_dump_info (REPORT_DR_DETAILS))
704 fprintf (vect_dump, "dependence distance negative.");
705 continue;
708 if (abs (dist) >= 2
709 && abs (dist) < *max_vf)
711 /* The dependence distance requires reduction of the maximal
712 vectorization factor. */
713 *max_vf = abs (dist);
714 if (vect_print_dump_info (REPORT_DR_DETAILS))
715 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
716 *max_vf);
719 if (abs (dist) >= *max_vf)
721 /* Dependence distance does not create dependence, as far as
722 vectorization is concerned, in this case. */
723 if (vect_print_dump_info (REPORT_DR_DETAILS))
724 fprintf (vect_dump, "dependence distance >= VF.");
725 continue;
728 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
730 fprintf (vect_dump, "not vectorized, possible dependence "
731 "between data-refs ");
732 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
733 fprintf (vect_dump, " and ");
734 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
737 return true;
740 return false;
743 /* Function vect_analyze_data_ref_dependences.
745 Examine all the data references in the loop, and make sure there do not
746 exist any data dependences between them. Set *MAX_VF according to
747 the maximum vectorization factor the data dependences allow. */
749 bool
750 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
751 bb_vec_info bb_vinfo, int *max_vf,
752 bool *data_dependence_in_bb)
754 unsigned int i;
755 VEC (ddr_p, heap) *ddrs = NULL;
756 struct data_dependence_relation *ddr;
758 if (vect_print_dump_info (REPORT_DETAILS))
759 fprintf (vect_dump, "=== vect_analyze_dependences ===");
761 if (loop_vinfo)
762 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
763 else
764 ddrs = BB_VINFO_DDRS (bb_vinfo);
766 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
767 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
768 data_dependence_in_bb))
769 return false;
771 return true;
775 /* Function vect_compute_data_ref_alignment
777 Compute the misalignment of the data reference DR.
779 Output:
780 1. If during the misalignment computation it is found that the data reference
781 cannot be vectorized then false is returned.
782 2. DR_MISALIGNMENT (DR) is defined.
784 FOR NOW: No analysis is actually performed. Misalignment is calculated
785 only for trivial cases. TODO. */
787 static bool
788 vect_compute_data_ref_alignment (struct data_reference *dr)
790 gimple stmt = DR_STMT (dr);
791 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
792 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
793 struct loop *loop = NULL;
794 tree ref = DR_REF (dr);
795 tree vectype;
796 tree base, base_addr;
797 bool base_aligned;
798 tree misalign;
799 tree aligned_to, alignment;
801 if (vect_print_dump_info (REPORT_DETAILS))
802 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
804 if (loop_vinfo)
805 loop = LOOP_VINFO_LOOP (loop_vinfo);
807 /* Initialize misalignment to unknown. */
808 SET_DR_MISALIGNMENT (dr, -1);
810 misalign = DR_INIT (dr);
811 aligned_to = DR_ALIGNED_TO (dr);
812 base_addr = DR_BASE_ADDRESS (dr);
813 vectype = STMT_VINFO_VECTYPE (stmt_info);
815 /* In case the dataref is in an inner-loop of the loop that is being
816 vectorized (LOOP), we use the base and misalignment information
817 relative to the outer-loop (LOOP). This is ok only if the misalignment
818 stays the same throughout the execution of the inner-loop, which is why
819 we have to check that the stride of the dataref in the inner-loop evenly
820 divides by the vector size. */
821 if (loop && nested_in_vect_loop_p (loop, stmt))
823 tree step = DR_STEP (dr);
824 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
826 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
828 if (vect_print_dump_info (REPORT_ALIGNMENT))
829 fprintf (vect_dump, "inner step divides the vector-size.");
830 misalign = STMT_VINFO_DR_INIT (stmt_info);
831 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
832 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
834 else
836 if (vect_print_dump_info (REPORT_ALIGNMENT))
837 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
838 misalign = NULL_TREE;
842 base = build_fold_indirect_ref (base_addr);
843 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
845 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
846 || !misalign)
848 if (vect_print_dump_info (REPORT_ALIGNMENT))
850 fprintf (vect_dump, "Unknown alignment for access: ");
851 print_generic_expr (vect_dump, base, TDF_SLIM);
853 return true;
856 if ((DECL_P (base)
857 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
858 alignment) >= 0)
859 || (TREE_CODE (base_addr) == SSA_NAME
860 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
861 TREE_TYPE (base_addr)))),
862 alignment) >= 0)
863 || (get_pointer_alignment (base_addr, TYPE_ALIGN (vectype))
864 >= TYPE_ALIGN (vectype)))
865 base_aligned = true;
866 else
867 base_aligned = false;
869 if (!base_aligned)
871 /* Do not change the alignment of global variables if
872 flag_section_anchors is enabled. */
873 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
874 || (TREE_STATIC (base) && flag_section_anchors))
876 if (vect_print_dump_info (REPORT_DETAILS))
878 fprintf (vect_dump, "can't force alignment of ref: ");
879 print_generic_expr (vect_dump, ref, TDF_SLIM);
881 return true;
884 /* Force the alignment of the decl.
885 NOTE: This is the only change to the code we make during
886 the analysis phase, before deciding to vectorize the loop. */
887 if (vect_print_dump_info (REPORT_DETAILS))
889 fprintf (vect_dump, "force alignment of ");
890 print_generic_expr (vect_dump, ref, TDF_SLIM);
893 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
894 DECL_USER_ALIGN (base) = 1;
897 /* At this point we assume that the base is aligned. */
898 gcc_assert (base_aligned
899 || (TREE_CODE (base) == VAR_DECL
900 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
902 /* If this is a backward running DR then first access in the larger
903 vectype actually is N-1 elements before the address in the DR.
904 Adjust misalign accordingly. */
905 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
907 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
908 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
909 otherwise we wouldn't be here. */
910 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
911 /* PLUS because DR_STEP was negative. */
912 misalign = size_binop (PLUS_EXPR, misalign, offset);
915 /* Modulo alignment. */
916 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
918 if (!host_integerp (misalign, 1))
920 /* Negative or overflowed misalignment value. */
921 if (vect_print_dump_info (REPORT_DETAILS))
922 fprintf (vect_dump, "unexpected misalign value");
923 return false;
926 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
928 if (vect_print_dump_info (REPORT_DETAILS))
930 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
931 print_generic_expr (vect_dump, ref, TDF_SLIM);
934 return true;
938 /* Function vect_compute_data_refs_alignment
940 Compute the misalignment of data references in the loop.
941 Return FALSE if a data reference is found that cannot be vectorized. */
943 static bool
944 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
945 bb_vec_info bb_vinfo)
947 VEC (data_reference_p, heap) *datarefs;
948 struct data_reference *dr;
949 unsigned int i;
951 if (loop_vinfo)
952 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
953 else
954 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
956 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
957 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
958 && !vect_compute_data_ref_alignment (dr))
960 if (bb_vinfo)
962 /* Mark unsupported statement as unvectorizable. */
963 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
964 continue;
966 else
967 return false;
970 return true;
974 /* Function vect_update_misalignment_for_peel
976 DR - the data reference whose misalignment is to be adjusted.
977 DR_PEEL - the data reference whose misalignment is being made
978 zero in the vector loop by the peel.
979 NPEEL - the number of iterations in the peel loop if the misalignment
980 of DR_PEEL is known at compile time. */
982 static void
983 vect_update_misalignment_for_peel (struct data_reference *dr,
984 struct data_reference *dr_peel, int npeel)
986 unsigned int i;
987 VEC(dr_p,heap) *same_align_drs;
988 struct data_reference *current_dr;
989 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
990 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
991 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
992 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
994 /* For interleaved data accesses the step in the loop must be multiplied by
995 the size of the interleaving group. */
996 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
997 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
998 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
999 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1001 /* It can be assumed that the data refs with the same alignment as dr_peel
1002 are aligned in the vector loop. */
1003 same_align_drs
1004 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1005 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1007 if (current_dr != dr)
1008 continue;
1009 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1010 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1011 SET_DR_MISALIGNMENT (dr, 0);
1012 return;
1015 if (known_alignment_for_access_p (dr)
1016 && known_alignment_for_access_p (dr_peel))
1018 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1019 int misal = DR_MISALIGNMENT (dr);
1020 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1021 misal += negative ? -npeel * dr_size : npeel * dr_size;
1022 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1023 SET_DR_MISALIGNMENT (dr, misal);
1024 return;
1027 if (vect_print_dump_info (REPORT_DETAILS))
1028 fprintf (vect_dump, "Setting misalignment to -1.");
1029 SET_DR_MISALIGNMENT (dr, -1);
1033 /* Function vect_verify_datarefs_alignment
1035 Return TRUE if all data references in the loop can be
1036 handled with respect to alignment. */
1038 bool
1039 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1041 VEC (data_reference_p, heap) *datarefs;
1042 struct data_reference *dr;
1043 enum dr_alignment_support supportable_dr_alignment;
1044 unsigned int i;
1046 if (loop_vinfo)
1047 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1048 else
1049 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1051 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1053 gimple stmt = DR_STMT (dr);
1054 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1056 /* For interleaving, only the alignment of the first access matters.
1057 Skip statements marked as not vectorizable. */
1058 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1059 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1060 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1061 continue;
1063 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1064 if (!supportable_dr_alignment)
1066 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1068 if (DR_IS_READ (dr))
1069 fprintf (vect_dump,
1070 "not vectorized: unsupported unaligned load.");
1071 else
1072 fprintf (vect_dump,
1073 "not vectorized: unsupported unaligned store.");
1075 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1077 return false;
1079 if (supportable_dr_alignment != dr_aligned
1080 && vect_print_dump_info (REPORT_ALIGNMENT))
1081 fprintf (vect_dump, "Vectorizing an unaligned access.");
1083 return true;
1087 /* Function vector_alignment_reachable_p
1089 Return true if vector alignment for DR is reachable by peeling
1090 a few loop iterations. Return false otherwise. */
1092 static bool
1093 vector_alignment_reachable_p (struct data_reference *dr)
1095 gimple stmt = DR_STMT (dr);
1096 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1097 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1099 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1101 /* For interleaved access we peel only if number of iterations in
1102 the prolog loop ({VF - misalignment}), is a multiple of the
1103 number of the interleaved accesses. */
1104 int elem_size, mis_in_elements;
1105 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1107 /* FORNOW: handle only known alignment. */
1108 if (!known_alignment_for_access_p (dr))
1109 return false;
1111 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1112 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1114 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1115 return false;
1118 /* If misalignment is known at the compile time then allow peeling
1119 only if natural alignment is reachable through peeling. */
1120 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1122 HOST_WIDE_INT elmsize =
1123 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1124 if (vect_print_dump_info (REPORT_DETAILS))
1126 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1127 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1129 if (DR_MISALIGNMENT (dr) % elmsize)
1131 if (vect_print_dump_info (REPORT_DETAILS))
1132 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1133 return false;
1137 if (!known_alignment_for_access_p (dr))
1139 tree type = (TREE_TYPE (DR_REF (dr)));
1140 tree ba = DR_BASE_OBJECT (dr);
1141 bool is_packed = false;
1143 if (ba)
1144 is_packed = contains_packed_reference (ba);
1146 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1147 is_packed = true;
1149 if (vect_print_dump_info (REPORT_DETAILS))
1150 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1151 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1152 return true;
1153 else
1154 return false;
1157 return true;
1161 /* Calculate the cost of the memory access represented by DR. */
1163 static void
1164 vect_get_data_access_cost (struct data_reference *dr,
1165 unsigned int *inside_cost,
1166 unsigned int *outside_cost)
1168 gimple stmt = DR_STMT (dr);
1169 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1170 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1171 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1172 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1173 int ncopies = vf / nunits;
1174 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1176 if (!supportable_dr_alignment)
1177 *inside_cost = VECT_MAX_COST;
1178 else
1180 if (DR_IS_READ (dr))
1181 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1182 else
1183 vect_get_store_cost (dr, ncopies, inside_cost);
1186 if (vect_print_dump_info (REPORT_COST))
1187 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1188 "outside_cost = %d.", *inside_cost, *outside_cost);
1192 static hashval_t
1193 vect_peeling_hash (const void *elem)
1195 const struct _vect_peel_info *peel_info;
1197 peel_info = (const struct _vect_peel_info *) elem;
1198 return (hashval_t) peel_info->npeel;
1202 static int
1203 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1205 const struct _vect_peel_info *a, *b;
1207 a = (const struct _vect_peel_info *) elem1;
1208 b = (const struct _vect_peel_info *) elem2;
1209 return (a->npeel == b->npeel);
1213 /* Insert DR into peeling hash table with NPEEL as key. */
1215 static void
1216 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1217 int npeel)
1219 struct _vect_peel_info elem, *slot;
1220 void **new_slot;
1221 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1223 elem.npeel = npeel;
1224 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1225 &elem);
1226 if (slot)
1227 slot->count++;
1228 else
1230 slot = XNEW (struct _vect_peel_info);
1231 slot->npeel = npeel;
1232 slot->dr = dr;
1233 slot->count = 1;
1234 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1235 INSERT);
1236 *new_slot = slot;
1239 if (!supportable_dr_alignment && !flag_vect_cost_model)
1240 slot->count += VECT_MAX_COST;
1244 /* Traverse peeling hash table to find peeling option that aligns maximum
1245 number of data accesses. */
1247 static int
1248 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1250 vect_peel_info elem = (vect_peel_info) *slot;
1251 vect_peel_extended_info max = (vect_peel_extended_info) data;
1253 if (elem->count > max->peel_info.count
1254 || (elem->count == max->peel_info.count
1255 && max->peel_info.npeel > elem->npeel))
1257 max->peel_info.npeel = elem->npeel;
1258 max->peel_info.count = elem->count;
1259 max->peel_info.dr = elem->dr;
1262 return 1;
1266 /* Traverse peeling hash table and calculate cost for each peeling option.
1267 Find the one with the lowest cost. */
1269 static int
1270 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1272 vect_peel_info elem = (vect_peel_info) *slot;
1273 vect_peel_extended_info min = (vect_peel_extended_info) data;
1274 int save_misalignment, dummy;
1275 unsigned int inside_cost = 0, outside_cost = 0, i;
1276 gimple stmt = DR_STMT (elem->dr);
1277 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1278 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1279 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1280 struct data_reference *dr;
1282 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1284 stmt = DR_STMT (dr);
1285 stmt_info = vinfo_for_stmt (stmt);
1286 /* For interleaving, only the alignment of the first access
1287 matters. */
1288 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1289 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1290 continue;
1292 save_misalignment = DR_MISALIGNMENT (dr);
1293 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1294 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1295 SET_DR_MISALIGNMENT (dr, save_misalignment);
1298 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1299 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1301 if (inside_cost < min->inside_cost
1302 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1304 min->inside_cost = inside_cost;
1305 min->outside_cost = outside_cost;
1306 min->peel_info.dr = elem->dr;
1307 min->peel_info.npeel = elem->npeel;
1310 return 1;
1314 /* Choose best peeling option by traversing peeling hash table and either
1315 choosing an option with the lowest cost (if cost model is enabled) or the
1316 option that aligns as many accesses as possible. */
1318 static struct data_reference *
1319 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1320 unsigned int *npeel)
1322 struct _vect_peel_extended_info res;
1324 res.peel_info.dr = NULL;
1326 if (flag_vect_cost_model)
1328 res.inside_cost = INT_MAX;
1329 res.outside_cost = INT_MAX;
1330 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1331 vect_peeling_hash_get_lowest_cost, &res);
1333 else
1335 res.peel_info.count = 0;
1336 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1337 vect_peeling_hash_get_most_frequent, &res);
1340 *npeel = res.peel_info.npeel;
1341 return res.peel_info.dr;
1345 /* Function vect_enhance_data_refs_alignment
1347 This pass will use loop versioning and loop peeling in order to enhance
1348 the alignment of data references in the loop.
1350 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1351 original loop is to be vectorized. Any other loops that are created by
1352 the transformations performed in this pass - are not supposed to be
1353 vectorized. This restriction will be relaxed.
1355 This pass will require a cost model to guide it whether to apply peeling
1356 or versioning or a combination of the two. For example, the scheme that
1357 intel uses when given a loop with several memory accesses, is as follows:
1358 choose one memory access ('p') which alignment you want to force by doing
1359 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1360 other accesses are not necessarily aligned, or (2) use loop versioning to
1361 generate one loop in which all accesses are aligned, and another loop in
1362 which only 'p' is necessarily aligned.
1364 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1365 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1366 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1368 Devising a cost model is the most critical aspect of this work. It will
1369 guide us on which access to peel for, whether to use loop versioning, how
1370 many versions to create, etc. The cost model will probably consist of
1371 generic considerations as well as target specific considerations (on
1372 powerpc for example, misaligned stores are more painful than misaligned
1373 loads).
1375 Here are the general steps involved in alignment enhancements:
1377 -- original loop, before alignment analysis:
1378 for (i=0; i<N; i++){
1379 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1380 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1383 -- After vect_compute_data_refs_alignment:
1384 for (i=0; i<N; i++){
1385 x = q[i]; # DR_MISALIGNMENT(q) = 3
1386 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1389 -- Possibility 1: we do loop versioning:
1390 if (p is aligned) {
1391 for (i=0; i<N; i++){ # loop 1A
1392 x = q[i]; # DR_MISALIGNMENT(q) = 3
1393 p[i] = y; # DR_MISALIGNMENT(p) = 0
1396 else {
1397 for (i=0; i<N; i++){ # loop 1B
1398 x = q[i]; # DR_MISALIGNMENT(q) = 3
1399 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1403 -- Possibility 2: we do loop peeling:
1404 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1405 x = q[i];
1406 p[i] = y;
1408 for (i = 3; i < N; i++){ # loop 2A
1409 x = q[i]; # DR_MISALIGNMENT(q) = 0
1410 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1413 -- Possibility 3: combination of loop peeling and versioning:
1414 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1415 x = q[i];
1416 p[i] = y;
1418 if (p is aligned) {
1419 for (i = 3; i<N; i++){ # loop 3A
1420 x = q[i]; # DR_MISALIGNMENT(q) = 0
1421 p[i] = y; # DR_MISALIGNMENT(p) = 0
1424 else {
1425 for (i = 3; i<N; i++){ # loop 3B
1426 x = q[i]; # DR_MISALIGNMENT(q) = 0
1427 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1431 These loops are later passed to loop_transform to be vectorized. The
1432 vectorizer will use the alignment information to guide the transformation
1433 (whether to generate regular loads/stores, or with special handling for
1434 misalignment). */
1436 bool
1437 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1439 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1440 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1441 enum dr_alignment_support supportable_dr_alignment;
1442 struct data_reference *dr0 = NULL, *first_store = NULL;
1443 struct data_reference *dr;
1444 unsigned int i, j;
1445 bool do_peeling = false;
1446 bool do_versioning = false;
1447 bool stat;
1448 gimple stmt;
1449 stmt_vec_info stmt_info;
1450 int vect_versioning_for_alias_required;
1451 unsigned int npeel = 0;
1452 bool all_misalignments_unknown = true;
1453 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1454 unsigned possible_npeel_number = 1;
1455 tree vectype;
1456 unsigned int nelements, mis, same_align_drs_max = 0;
1458 if (vect_print_dump_info (REPORT_DETAILS))
1459 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1461 /* While cost model enhancements are expected in the future, the high level
1462 view of the code at this time is as follows:
1464 A) If there is a misaligned access then see if peeling to align
1465 this access can make all data references satisfy
1466 vect_supportable_dr_alignment. If so, update data structures
1467 as needed and return true.
1469 B) If peeling wasn't possible and there is a data reference with an
1470 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1471 then see if loop versioning checks can be used to make all data
1472 references satisfy vect_supportable_dr_alignment. If so, update
1473 data structures as needed and return true.
1475 C) If neither peeling nor versioning were successful then return false if
1476 any data reference does not satisfy vect_supportable_dr_alignment.
1478 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1480 Note, Possibility 3 above (which is peeling and versioning together) is not
1481 being done at this time. */
1483 /* (1) Peeling to force alignment. */
1485 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1486 Considerations:
1487 + How many accesses will become aligned due to the peeling
1488 - How many accesses will become unaligned due to the peeling,
1489 and the cost of misaligned accesses.
1490 - The cost of peeling (the extra runtime checks, the increase
1491 in code size). */
1493 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1495 stmt = DR_STMT (dr);
1496 stmt_info = vinfo_for_stmt (stmt);
1498 if (!STMT_VINFO_RELEVANT (stmt_info))
1499 continue;
1501 /* For interleaving, only the alignment of the first access
1502 matters. */
1503 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1504 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1505 continue;
1507 /* For invariant accesses there is nothing to enhance. */
1508 if (integer_zerop (DR_STEP (dr)))
1509 continue;
1511 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1512 do_peeling = vector_alignment_reachable_p (dr);
1513 if (do_peeling)
1515 if (known_alignment_for_access_p (dr))
1517 unsigned int npeel_tmp;
1518 bool negative = tree_int_cst_compare (DR_STEP (dr),
1519 size_zero_node) < 0;
1521 /* Save info about DR in the hash table. */
1522 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1523 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1524 htab_create (1, vect_peeling_hash,
1525 vect_peeling_hash_eq, free);
1527 vectype = STMT_VINFO_VECTYPE (stmt_info);
1528 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1529 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1530 TREE_TYPE (DR_REF (dr))));
1531 npeel_tmp = (negative
1532 ? (mis - nelements) : (nelements - mis))
1533 & (nelements - 1);
1535 /* For multiple types, it is possible that the bigger type access
1536 will have more than one peeling option. E.g., a loop with two
1537 types: one of size (vector size / 4), and the other one of
1538 size (vector size / 8). Vectorization factor will 8. If both
1539 access are misaligned by 3, the first one needs one scalar
1540 iteration to be aligned, and the second one needs 5. But the
1541 the first one will be aligned also by peeling 5 scalar
1542 iterations, and in that case both accesses will be aligned.
1543 Hence, except for the immediate peeling amount, we also want
1544 to try to add full vector size, while we don't exceed
1545 vectorization factor.
1546 We do this automtically for cost model, since we calculate cost
1547 for every peeling option. */
1548 if (!flag_vect_cost_model)
1549 possible_npeel_number = vf /nelements;
1551 /* Handle the aligned case. We may decide to align some other
1552 access, making DR unaligned. */
1553 if (DR_MISALIGNMENT (dr) == 0)
1555 npeel_tmp = 0;
1556 if (!flag_vect_cost_model)
1557 possible_npeel_number++;
1560 for (j = 0; j < possible_npeel_number; j++)
1562 gcc_assert (npeel_tmp <= vf);
1563 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1564 npeel_tmp += nelements;
1567 all_misalignments_unknown = false;
1568 /* Data-ref that was chosen for the case that all the
1569 misalignments are unknown is not relevant anymore, since we
1570 have a data-ref with known alignment. */
1571 dr0 = NULL;
1573 else
1575 /* If we don't know all the misalignment values, we prefer
1576 peeling for data-ref that has maximum number of data-refs
1577 with the same alignment, unless the target prefers to align
1578 stores over load. */
1579 if (all_misalignments_unknown)
1581 if (same_align_drs_max < VEC_length (dr_p,
1582 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1583 || !dr0)
1585 same_align_drs_max = VEC_length (dr_p,
1586 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1587 dr0 = dr;
1590 if (!first_store && DR_IS_WRITE (dr))
1591 first_store = dr;
1594 /* If there are both known and unknown misaligned accesses in the
1595 loop, we choose peeling amount according to the known
1596 accesses. */
1599 if (!supportable_dr_alignment)
1601 dr0 = dr;
1602 if (!first_store && DR_IS_WRITE (dr))
1603 first_store = dr;
1607 else
1609 if (!aligned_access_p (dr))
1611 if (vect_print_dump_info (REPORT_DETAILS))
1612 fprintf (vect_dump, "vector alignment may not be reachable");
1614 break;
1619 vect_versioning_for_alias_required
1620 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1622 /* Temporarily, if versioning for alias is required, we disable peeling
1623 until we support peeling and versioning. Often peeling for alignment
1624 will require peeling for loop-bound, which in turn requires that we
1625 know how to adjust the loop ivs after the loop. */
1626 if (vect_versioning_for_alias_required
1627 || !vect_can_advance_ivs_p (loop_vinfo)
1628 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1629 do_peeling = false;
1631 if (do_peeling && all_misalignments_unknown
1632 && vect_supportable_dr_alignment (dr0, false))
1635 /* Check if the target requires to prefer stores over loads, i.e., if
1636 misaligned stores are more expensive than misaligned loads (taking
1637 drs with same alignment into account). */
1638 if (first_store && DR_IS_READ (dr0))
1640 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1641 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1642 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1643 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1645 vect_get_data_access_cost (dr0, &load_inside_cost,
1646 &load_outside_cost);
1647 vect_get_data_access_cost (first_store, &store_inside_cost,
1648 &store_outside_cost);
1650 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1651 aligning the load DR0). */
1652 load_inside_penalty = store_inside_cost;
1653 load_outside_penalty = store_outside_cost;
1654 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1655 (vinfo_for_stmt (DR_STMT (first_store))),
1656 i, dr);
1657 i++)
1658 if (DR_IS_READ (dr))
1660 load_inside_penalty += load_inside_cost;
1661 load_outside_penalty += load_outside_cost;
1663 else
1665 load_inside_penalty += store_inside_cost;
1666 load_outside_penalty += store_outside_cost;
1669 /* Calculate the penalty for leaving DR0 unaligned (by
1670 aligning the FIRST_STORE). */
1671 store_inside_penalty = load_inside_cost;
1672 store_outside_penalty = load_outside_cost;
1673 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1674 (vinfo_for_stmt (DR_STMT (dr0))),
1675 i, dr);
1676 i++)
1677 if (DR_IS_READ (dr))
1679 store_inside_penalty += load_inside_cost;
1680 store_outside_penalty += load_outside_cost;
1682 else
1684 store_inside_penalty += store_inside_cost;
1685 store_outside_penalty += store_outside_cost;
1688 if (load_inside_penalty > store_inside_penalty
1689 || (load_inside_penalty == store_inside_penalty
1690 && load_outside_penalty > store_outside_penalty))
1691 dr0 = first_store;
1694 /* In case there are only loads with different unknown misalignments, use
1695 peeling only if it may help to align other accesses in the loop. */
1696 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1697 (vinfo_for_stmt (DR_STMT (dr0))))
1698 && vect_supportable_dr_alignment (dr0, false)
1699 != dr_unaligned_supported)
1700 do_peeling = false;
1703 if (do_peeling && !dr0)
1705 /* Peeling is possible, but there is no data access that is not supported
1706 unless aligned. So we try to choose the best possible peeling. */
1708 /* We should get here only if there are drs with known misalignment. */
1709 gcc_assert (!all_misalignments_unknown);
1711 /* Choose the best peeling from the hash table. */
1712 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1713 if (!dr0 || !npeel)
1714 do_peeling = false;
1717 if (do_peeling)
1719 stmt = DR_STMT (dr0);
1720 stmt_info = vinfo_for_stmt (stmt);
1721 vectype = STMT_VINFO_VECTYPE (stmt_info);
1722 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1724 if (known_alignment_for_access_p (dr0))
1726 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1727 size_zero_node) < 0;
1728 if (!npeel)
1730 /* Since it's known at compile time, compute the number of
1731 iterations in the peeled loop (the peeling factor) for use in
1732 updating DR_MISALIGNMENT values. The peeling factor is the
1733 vectorization factor minus the misalignment as an element
1734 count. */
1735 mis = DR_MISALIGNMENT (dr0);
1736 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1737 npeel = ((negative ? mis - nelements : nelements - mis)
1738 & (nelements - 1));
1741 /* For interleaved data access every iteration accesses all the
1742 members of the group, therefore we divide the number of iterations
1743 by the group size. */
1744 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1745 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1746 npeel /= GROUP_SIZE (stmt_info);
1748 if (vect_print_dump_info (REPORT_DETAILS))
1749 fprintf (vect_dump, "Try peeling by %d", npeel);
1752 /* Ensure that all data refs can be vectorized after the peel. */
1753 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1755 int save_misalignment;
1757 if (dr == dr0)
1758 continue;
1760 stmt = DR_STMT (dr);
1761 stmt_info = vinfo_for_stmt (stmt);
1762 /* For interleaving, only the alignment of the first access
1763 matters. */
1764 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1765 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1766 continue;
1768 save_misalignment = DR_MISALIGNMENT (dr);
1769 vect_update_misalignment_for_peel (dr, dr0, npeel);
1770 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1771 SET_DR_MISALIGNMENT (dr, save_misalignment);
1773 if (!supportable_dr_alignment)
1775 do_peeling = false;
1776 break;
1780 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1782 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1783 if (!stat)
1784 do_peeling = false;
1785 else
1786 return stat;
1789 if (do_peeling)
1791 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1792 If the misalignment of DR_i is identical to that of dr0 then set
1793 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1794 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1795 by the peeling factor times the element size of DR_i (MOD the
1796 vectorization factor times the size). Otherwise, the
1797 misalignment of DR_i must be set to unknown. */
1798 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1799 if (dr != dr0)
1800 vect_update_misalignment_for_peel (dr, dr0, npeel);
1802 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1803 if (npeel)
1804 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1805 else
1806 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1807 SET_DR_MISALIGNMENT (dr0, 0);
1808 if (vect_print_dump_info (REPORT_ALIGNMENT))
1809 fprintf (vect_dump, "Alignment of access forced using peeling.");
1811 if (vect_print_dump_info (REPORT_DETAILS))
1812 fprintf (vect_dump, "Peeling for alignment will be applied.");
1814 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1815 gcc_assert (stat);
1816 return stat;
1821 /* (2) Versioning to force alignment. */
1823 /* Try versioning if:
1824 1) flag_tree_vect_loop_version is TRUE
1825 2) optimize loop for speed
1826 3) there is at least one unsupported misaligned data ref with an unknown
1827 misalignment, and
1828 4) all misaligned data refs with a known misalignment are supported, and
1829 5) the number of runtime alignment checks is within reason. */
1831 do_versioning =
1832 flag_tree_vect_loop_version
1833 && optimize_loop_nest_for_speed_p (loop)
1834 && (!loop->inner); /* FORNOW */
1836 if (do_versioning)
1838 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1840 stmt = DR_STMT (dr);
1841 stmt_info = vinfo_for_stmt (stmt);
1843 /* For interleaving, only the alignment of the first access
1844 matters. */
1845 if (aligned_access_p (dr)
1846 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1847 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1848 continue;
1850 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1852 if (!supportable_dr_alignment)
1854 gimple stmt;
1855 int mask;
1856 tree vectype;
1858 if (known_alignment_for_access_p (dr)
1859 || VEC_length (gimple,
1860 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1861 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1863 do_versioning = false;
1864 break;
1867 stmt = DR_STMT (dr);
1868 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1869 gcc_assert (vectype);
1871 /* The rightmost bits of an aligned address must be zeros.
1872 Construct the mask needed for this test. For example,
1873 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1874 mask must be 15 = 0xf. */
1875 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1877 /* FORNOW: use the same mask to test all potentially unaligned
1878 references in the loop. The vectorizer currently supports
1879 a single vector size, see the reference to
1880 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1881 vectorization factor is computed. */
1882 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1883 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1884 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1885 VEC_safe_push (gimple, heap,
1886 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1887 DR_STMT (dr));
1891 /* Versioning requires at least one misaligned data reference. */
1892 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1893 do_versioning = false;
1894 else if (!do_versioning)
1895 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1898 if (do_versioning)
1900 VEC(gimple,heap) *may_misalign_stmts
1901 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1902 gimple stmt;
1904 /* It can now be assumed that the data references in the statements
1905 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1906 of the loop being vectorized. */
1907 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1909 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1910 dr = STMT_VINFO_DATA_REF (stmt_info);
1911 SET_DR_MISALIGNMENT (dr, 0);
1912 if (vect_print_dump_info (REPORT_ALIGNMENT))
1913 fprintf (vect_dump, "Alignment of access forced using versioning.");
1916 if (vect_print_dump_info (REPORT_DETAILS))
1917 fprintf (vect_dump, "Versioning for alignment will be applied.");
1919 /* Peeling and versioning can't be done together at this time. */
1920 gcc_assert (! (do_peeling && do_versioning));
1922 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1923 gcc_assert (stat);
1924 return stat;
1927 /* This point is reached if neither peeling nor versioning is being done. */
1928 gcc_assert (! (do_peeling || do_versioning));
1930 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1931 return stat;
1935 /* Function vect_find_same_alignment_drs.
1937 Update group and alignment relations according to the chosen
1938 vectorization factor. */
1940 static void
1941 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1942 loop_vec_info loop_vinfo)
1944 unsigned int i;
1945 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1946 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1947 struct data_reference *dra = DDR_A (ddr);
1948 struct data_reference *drb = DDR_B (ddr);
1949 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1950 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1951 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1952 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1953 lambda_vector dist_v;
1954 unsigned int loop_depth;
1956 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1957 return;
1959 if (dra == drb)
1960 return;
1962 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1963 return;
1965 /* Loop-based vectorization and known data dependence. */
1966 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1967 return;
1969 /* Data-dependence analysis reports a distance vector of zero
1970 for data-references that overlap only in the first iteration
1971 but have different sign step (see PR45764).
1972 So as a sanity check require equal DR_STEP. */
1973 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1974 return;
1976 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1977 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1979 int dist = dist_v[loop_depth];
1981 if (vect_print_dump_info (REPORT_DR_DETAILS))
1982 fprintf (vect_dump, "dependence distance = %d.", dist);
1984 /* Same loop iteration. */
1985 if (dist == 0
1986 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1988 /* Two references with distance zero have the same alignment. */
1989 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1990 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1991 if (vect_print_dump_info (REPORT_ALIGNMENT))
1992 fprintf (vect_dump, "accesses have the same alignment.");
1993 if (vect_print_dump_info (REPORT_DR_DETAILS))
1995 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1996 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1997 fprintf (vect_dump, " and ");
1998 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
2005 /* Function vect_analyze_data_refs_alignment
2007 Analyze the alignment of the data-references in the loop.
2008 Return FALSE if a data reference is found that cannot be vectorized. */
2010 bool
2011 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2012 bb_vec_info bb_vinfo)
2014 if (vect_print_dump_info (REPORT_DETAILS))
2015 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2017 /* Mark groups of data references with same alignment using
2018 data dependence information. */
2019 if (loop_vinfo)
2021 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2022 struct data_dependence_relation *ddr;
2023 unsigned int i;
2025 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2026 vect_find_same_alignment_drs (ddr, loop_vinfo);
2029 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2031 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2032 fprintf (vect_dump,
2033 "not vectorized: can't calculate alignment for data ref.");
2034 return false;
2037 return true;
2041 /* Analyze groups of strided accesses: check that DR belongs to a group of
2042 strided accesses of legal size, step, etc. Detect gaps, single element
2043 interleaving, and other special cases. Set strided access info.
2044 Collect groups of strided stores for further use in SLP analysis. */
2046 static bool
2047 vect_analyze_group_access (struct data_reference *dr)
2049 tree step = DR_STEP (dr);
2050 tree scalar_type = TREE_TYPE (DR_REF (dr));
2051 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2052 gimple stmt = DR_STMT (dr);
2053 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2054 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2055 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2056 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2057 HOST_WIDE_INT stride, last_accessed_element = 1;
2058 bool slp_impossible = false;
2060 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2061 interleaving group (including gaps). */
2062 stride = dr_step / type_size;
2064 /* Not consecutive access is possible only if it is a part of interleaving. */
2065 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2067 /* Check if it this DR is a part of interleaving, and is a single
2068 element of the group that is accessed in the loop. */
2070 /* Gaps are supported only for loads. STEP must be a multiple of the type
2071 size. The size of the group must be a power of 2. */
2072 if (DR_IS_READ (dr)
2073 && (dr_step % type_size) == 0
2074 && stride > 0
2075 && exact_log2 (stride) != -1)
2077 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2078 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2079 if (vect_print_dump_info (REPORT_DR_DETAILS))
2081 fprintf (vect_dump, "Detected single element interleaving ");
2082 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2083 fprintf (vect_dump, " step ");
2084 print_generic_expr (vect_dump, step, TDF_SLIM);
2087 if (loop_vinfo)
2089 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2091 if (vect_print_dump_info (REPORT_DETAILS))
2092 fprintf (vect_dump, "Data access with gaps requires scalar "
2093 "epilogue loop");
2096 return true;
2099 if (vect_print_dump_info (REPORT_DETAILS))
2101 fprintf (vect_dump, "not consecutive access ");
2102 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2105 if (bb_vinfo)
2107 /* Mark the statement as unvectorizable. */
2108 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2109 return true;
2112 return false;
2115 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2117 /* First stmt in the interleaving chain. Check the chain. */
2118 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2119 struct data_reference *data_ref = dr;
2120 unsigned int count = 1;
2121 tree next_step;
2122 tree prev_init = DR_INIT (data_ref);
2123 gimple prev = stmt;
2124 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2126 while (next)
2128 /* Skip same data-refs. In case that two or more stmts share
2129 data-ref (supported only for loads), we vectorize only the first
2130 stmt, and the rest get their vectorized loads from the first
2131 one. */
2132 if (!tree_int_cst_compare (DR_INIT (data_ref),
2133 DR_INIT (STMT_VINFO_DATA_REF (
2134 vinfo_for_stmt (next)))))
2136 if (DR_IS_WRITE (data_ref))
2138 if (vect_print_dump_info (REPORT_DETAILS))
2139 fprintf (vect_dump, "Two store stmts share the same dr.");
2140 return false;
2143 /* Check that there is no load-store dependencies for this loads
2144 to prevent a case of load-store-load to the same location. */
2145 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2146 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2148 if (vect_print_dump_info (REPORT_DETAILS))
2149 fprintf (vect_dump,
2150 "READ_WRITE dependence in interleaving.");
2151 return false;
2154 /* For load use the same data-ref load. */
2155 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2157 prev = next;
2158 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2159 continue;
2162 prev = next;
2164 /* Check that all the accesses have the same STEP. */
2165 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2166 if (tree_int_cst_compare (step, next_step))
2168 if (vect_print_dump_info (REPORT_DETAILS))
2169 fprintf (vect_dump, "not consecutive access in interleaving");
2170 return false;
2173 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2174 /* Check that the distance between two accesses is equal to the type
2175 size. Otherwise, we have gaps. */
2176 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2177 - TREE_INT_CST_LOW (prev_init)) / type_size;
2178 if (diff != 1)
2180 /* FORNOW: SLP of accesses with gaps is not supported. */
2181 slp_impossible = true;
2182 if (DR_IS_WRITE (data_ref))
2184 if (vect_print_dump_info (REPORT_DETAILS))
2185 fprintf (vect_dump, "interleaved store with gaps");
2186 return false;
2189 gaps += diff - 1;
2192 last_accessed_element += diff;
2194 /* Store the gap from the previous member of the group. If there is no
2195 gap in the access, GROUP_GAP is always 1. */
2196 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2198 prev_init = DR_INIT (data_ref);
2199 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2200 /* Count the number of data-refs in the chain. */
2201 count++;
2204 /* COUNT is the number of accesses found, we multiply it by the size of
2205 the type to get COUNT_IN_BYTES. */
2206 count_in_bytes = type_size * count;
2208 /* Check that the size of the interleaving (including gaps) is not
2209 greater than STEP. */
2210 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2212 if (vect_print_dump_info (REPORT_DETAILS))
2214 fprintf (vect_dump, "interleaving size is greater than step for ");
2215 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2217 return false;
2220 /* Check that the size of the interleaving is equal to STEP for stores,
2221 i.e., that there are no gaps. */
2222 if (dr_step && dr_step != count_in_bytes)
2224 if (DR_IS_READ (dr))
2226 slp_impossible = true;
2227 /* There is a gap after the last load in the group. This gap is a
2228 difference between the stride and the number of elements. When
2229 there is no gap, this difference should be 0. */
2230 GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2232 else
2234 if (vect_print_dump_info (REPORT_DETAILS))
2235 fprintf (vect_dump, "interleaved store with gaps");
2236 return false;
2240 /* Check that STEP is a multiple of type size. */
2241 if (dr_step && (dr_step % type_size) != 0)
2243 if (vect_print_dump_info (REPORT_DETAILS))
2245 fprintf (vect_dump, "step is not a multiple of type size: step ");
2246 print_generic_expr (vect_dump, step, TDF_SLIM);
2247 fprintf (vect_dump, " size ");
2248 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2249 TDF_SLIM);
2251 return false;
2254 if (stride == 0)
2255 stride = count;
2257 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2258 if (vect_print_dump_info (REPORT_DETAILS))
2259 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2261 /* SLP: create an SLP data structure for every interleaving group of
2262 stores for further analysis in vect_analyse_slp. */
2263 if (DR_IS_WRITE (dr) && !slp_impossible)
2265 if (loop_vinfo)
2266 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2267 stmt);
2268 if (bb_vinfo)
2269 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2270 stmt);
2273 /* There is a gap in the end of the group. */
2274 if (stride - last_accessed_element > 0 && loop_vinfo)
2276 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2277 if (vect_print_dump_info (REPORT_DETAILS))
2278 fprintf (vect_dump, "Data access with gaps requires scalar "
2279 "epilogue loop");
2283 return true;
2287 /* Analyze the access pattern of the data-reference DR.
2288 In case of non-consecutive accesses call vect_analyze_group_access() to
2289 analyze groups of strided accesses. */
2291 static bool
2292 vect_analyze_data_ref_access (struct data_reference *dr)
2294 tree step = DR_STEP (dr);
2295 tree scalar_type = TREE_TYPE (DR_REF (dr));
2296 gimple stmt = DR_STMT (dr);
2297 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2298 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2299 struct loop *loop = NULL;
2300 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2302 if (loop_vinfo)
2303 loop = LOOP_VINFO_LOOP (loop_vinfo);
2305 if (loop_vinfo && !step)
2307 if (vect_print_dump_info (REPORT_DETAILS))
2308 fprintf (vect_dump, "bad data-ref access in loop");
2309 return false;
2312 /* Allow invariant loads in loops. */
2313 if (loop_vinfo && dr_step == 0)
2315 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2316 return DR_IS_READ (dr);
2319 if (loop && nested_in_vect_loop_p (loop, stmt))
2321 /* Interleaved accesses are not yet supported within outer-loop
2322 vectorization for references in the inner-loop. */
2323 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2325 /* For the rest of the analysis we use the outer-loop step. */
2326 step = STMT_VINFO_DR_STEP (stmt_info);
2327 dr_step = TREE_INT_CST_LOW (step);
2329 if (dr_step == 0)
2331 if (vect_print_dump_info (REPORT_ALIGNMENT))
2332 fprintf (vect_dump, "zero step in outer loop.");
2333 if (DR_IS_READ (dr))
2334 return true;
2335 else
2336 return false;
2340 /* Consecutive? */
2341 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2342 || (dr_step < 0
2343 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2345 /* Mark that it is not interleaving. */
2346 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2347 return true;
2350 if (loop && nested_in_vect_loop_p (loop, stmt))
2352 if (vect_print_dump_info (REPORT_ALIGNMENT))
2353 fprintf (vect_dump, "strided access in outer loop.");
2354 return false;
2357 /* Not consecutive access - check if it's a part of interleaving group. */
2358 return vect_analyze_group_access (dr);
2362 /* Function vect_analyze_data_ref_accesses.
2364 Analyze the access pattern of all the data references in the loop.
2366 FORNOW: the only access pattern that is considered vectorizable is a
2367 simple step 1 (consecutive) access.
2369 FORNOW: handle only arrays and pointer accesses. */
2371 bool
2372 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2374 unsigned int i;
2375 VEC (data_reference_p, heap) *datarefs;
2376 struct data_reference *dr;
2378 if (vect_print_dump_info (REPORT_DETAILS))
2379 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2381 if (loop_vinfo)
2382 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2383 else
2384 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2386 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2387 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2388 && !vect_analyze_data_ref_access (dr))
2390 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2391 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2393 if (bb_vinfo)
2395 /* Mark the statement as not vectorizable. */
2396 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2397 continue;
2399 else
2400 return false;
2403 return true;
2406 /* Function vect_prune_runtime_alias_test_list.
2408 Prune a list of ddrs to be tested at run-time by versioning for alias.
2409 Return FALSE if resulting list of ddrs is longer then allowed by
2410 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2412 bool
2413 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2415 VEC (ddr_p, heap) * ddrs =
2416 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2417 unsigned i, j;
2419 if (vect_print_dump_info (REPORT_DETAILS))
2420 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2422 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2424 bool found;
2425 ddr_p ddr_i;
2427 ddr_i = VEC_index (ddr_p, ddrs, i);
2428 found = false;
2430 for (j = 0; j < i; j++)
2432 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2434 if (vect_vfa_range_equal (ddr_i, ddr_j))
2436 if (vect_print_dump_info (REPORT_DR_DETAILS))
2438 fprintf (vect_dump, "found equal ranges ");
2439 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2440 fprintf (vect_dump, ", ");
2441 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2442 fprintf (vect_dump, " and ");
2443 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2444 fprintf (vect_dump, ", ");
2445 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2447 found = true;
2448 break;
2452 if (found)
2454 VEC_ordered_remove (ddr_p, ddrs, i);
2455 continue;
2457 i++;
2460 if (VEC_length (ddr_p, ddrs) >
2461 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2463 if (vect_print_dump_info (REPORT_DR_DETAILS))
2465 fprintf (vect_dump,
2466 "disable versioning for alias - max number of generated "
2467 "checks exceeded.");
2470 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2472 return false;
2475 return true;
2479 /* Function vect_analyze_data_refs.
2481 Find all the data references in the loop or basic block.
2483 The general structure of the analysis of data refs in the vectorizer is as
2484 follows:
2485 1- vect_analyze_data_refs(loop/bb): call
2486 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2487 in the loop/bb and their dependences.
2488 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2489 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2490 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2494 bool
2495 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2496 bb_vec_info bb_vinfo,
2497 int *min_vf)
2499 struct loop *loop = NULL;
2500 basic_block bb = NULL;
2501 unsigned int i;
2502 VEC (data_reference_p, heap) *datarefs;
2503 struct data_reference *dr;
2504 tree scalar_type;
2505 bool res;
2507 if (vect_print_dump_info (REPORT_DETAILS))
2508 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2510 if (loop_vinfo)
2512 loop = LOOP_VINFO_LOOP (loop_vinfo);
2513 res = compute_data_dependences_for_loop
2514 (loop, true,
2515 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2516 &LOOP_VINFO_DATAREFS (loop_vinfo),
2517 &LOOP_VINFO_DDRS (loop_vinfo));
2519 if (!res)
2521 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2522 fprintf (vect_dump, "not vectorized: loop contains function calls"
2523 " or data references that cannot be analyzed");
2524 return false;
2527 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2529 else
2531 bb = BB_VINFO_BB (bb_vinfo);
2532 res = compute_data_dependences_for_bb (bb, true,
2533 &BB_VINFO_DATAREFS (bb_vinfo),
2534 &BB_VINFO_DDRS (bb_vinfo));
2535 if (!res)
2537 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2538 fprintf (vect_dump, "not vectorized: basic block contains function"
2539 " calls or data references that cannot be analyzed");
2540 return false;
2543 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2546 /* Go through the data-refs, check that the analysis succeeded. Update
2547 pointer from stmt_vec_info struct to DR and vectype. */
2549 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2551 gimple stmt;
2552 stmt_vec_info stmt_info;
2553 tree base, offset, init;
2554 int vf;
2556 if (!dr || !DR_REF (dr))
2558 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2559 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2560 return false;
2563 stmt = DR_STMT (dr);
2564 stmt_info = vinfo_for_stmt (stmt);
2566 /* Check that analysis of the data-ref succeeded. */
2567 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2568 || !DR_STEP (dr))
2570 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2572 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2573 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2576 if (bb_vinfo)
2578 /* Mark the statement as not vectorizable. */
2579 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2580 continue;
2582 else
2583 return false;
2586 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2588 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2589 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2590 "constant");
2591 if (bb_vinfo)
2593 /* Mark the statement as not vectorizable. */
2594 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2595 continue;
2597 else
2598 return false;
2601 if (TREE_THIS_VOLATILE (DR_REF (dr)))
2603 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2605 fprintf (vect_dump, "not vectorized: volatile type ");
2606 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2608 return false;
2611 base = unshare_expr (DR_BASE_ADDRESS (dr));
2612 offset = unshare_expr (DR_OFFSET (dr));
2613 init = unshare_expr (DR_INIT (dr));
2615 if (stmt_can_throw_internal (stmt))
2617 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2619 fprintf (vect_dump, "not vectorized: statement can throw an "
2620 "exception ");
2621 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2623 return false;
2626 /* Update DR field in stmt_vec_info struct. */
2628 /* If the dataref is in an inner-loop of the loop that is considered for
2629 for vectorization, we also want to analyze the access relative to
2630 the outer-loop (DR contains information only relative to the
2631 inner-most enclosing loop). We do that by building a reference to the
2632 first location accessed by the inner-loop, and analyze it relative to
2633 the outer-loop. */
2634 if (loop && nested_in_vect_loop_p (loop, stmt))
2636 tree outer_step, outer_base, outer_init;
2637 HOST_WIDE_INT pbitsize, pbitpos;
2638 tree poffset;
2639 enum machine_mode pmode;
2640 int punsignedp, pvolatilep;
2641 affine_iv base_iv, offset_iv;
2642 tree dinit;
2644 /* Build a reference to the first location accessed by the
2645 inner-loop: *(BASE+INIT). (The first location is actually
2646 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2647 tree inner_base = build_fold_indirect_ref
2648 (fold_build_pointer_plus (base, init));
2650 if (vect_print_dump_info (REPORT_DETAILS))
2652 fprintf (vect_dump, "analyze in outer-loop: ");
2653 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2656 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2657 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2658 gcc_assert (outer_base != NULL_TREE);
2660 if (pbitpos % BITS_PER_UNIT != 0)
2662 if (vect_print_dump_info (REPORT_DETAILS))
2663 fprintf (vect_dump, "failed: bit offset alignment.\n");
2664 return false;
2667 outer_base = build_fold_addr_expr (outer_base);
2668 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2669 &base_iv, false))
2671 if (vect_print_dump_info (REPORT_DETAILS))
2672 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2673 return false;
2676 if (offset)
2678 if (poffset)
2679 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2680 poffset);
2681 else
2682 poffset = offset;
2685 if (!poffset)
2687 offset_iv.base = ssize_int (0);
2688 offset_iv.step = ssize_int (0);
2690 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2691 &offset_iv, false))
2693 if (vect_print_dump_info (REPORT_DETAILS))
2694 fprintf (vect_dump, "evolution of offset is not affine.\n");
2695 return false;
2698 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2699 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2700 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2701 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2702 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2704 outer_step = size_binop (PLUS_EXPR,
2705 fold_convert (ssizetype, base_iv.step),
2706 fold_convert (ssizetype, offset_iv.step));
2708 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2709 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2710 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2711 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2712 STMT_VINFO_DR_OFFSET (stmt_info) =
2713 fold_convert (ssizetype, offset_iv.base);
2714 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2715 size_int (highest_pow2_factor (offset_iv.base));
2717 if (vect_print_dump_info (REPORT_DETAILS))
2719 fprintf (vect_dump, "\touter base_address: ");
2720 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2721 fprintf (vect_dump, "\n\touter offset from base address: ");
2722 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2723 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2724 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2725 fprintf (vect_dump, "\n\touter step: ");
2726 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2727 fprintf (vect_dump, "\n\touter aligned to: ");
2728 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2732 if (STMT_VINFO_DATA_REF (stmt_info))
2734 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2736 fprintf (vect_dump,
2737 "not vectorized: more than one data ref in stmt: ");
2738 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2740 return false;
2743 STMT_VINFO_DATA_REF (stmt_info) = dr;
2745 /* Set vectype for STMT. */
2746 scalar_type = TREE_TYPE (DR_REF (dr));
2747 STMT_VINFO_VECTYPE (stmt_info) =
2748 get_vectype_for_scalar_type (scalar_type);
2749 if (!STMT_VINFO_VECTYPE (stmt_info))
2751 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2753 fprintf (vect_dump,
2754 "not vectorized: no vectype for stmt: ");
2755 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2756 fprintf (vect_dump, " scalar_type: ");
2757 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2760 if (bb_vinfo)
2762 /* Mark the statement as not vectorizable. */
2763 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2764 continue;
2766 else
2767 return false;
2770 /* Adjust the minimal vectorization factor according to the
2771 vector type. */
2772 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2773 if (vf > *min_vf)
2774 *min_vf = vf;
2777 return true;
2781 /* Function vect_get_new_vect_var.
2783 Returns a name for a new variable. The current naming scheme appends the
2784 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2785 the name of vectorizer generated variables, and appends that to NAME if
2786 provided. */
2788 tree
2789 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2791 const char *prefix;
2792 tree new_vect_var;
2794 switch (var_kind)
2796 case vect_simple_var:
2797 prefix = "vect_";
2798 break;
2799 case vect_scalar_var:
2800 prefix = "stmp_";
2801 break;
2802 case vect_pointer_var:
2803 prefix = "vect_p";
2804 break;
2805 default:
2806 gcc_unreachable ();
2809 if (name)
2811 char* tmp = concat (prefix, name, NULL);
2812 new_vect_var = create_tmp_var (type, tmp);
2813 free (tmp);
2815 else
2816 new_vect_var = create_tmp_var (type, prefix);
2818 /* Mark vector typed variable as a gimple register variable. */
2819 if (TREE_CODE (type) == VECTOR_TYPE)
2820 DECL_GIMPLE_REG_P (new_vect_var) = true;
2822 return new_vect_var;
2826 /* Function vect_create_addr_base_for_vector_ref.
2828 Create an expression that computes the address of the first memory location
2829 that will be accessed for a data reference.
2831 Input:
2832 STMT: The statement containing the data reference.
2833 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2834 OFFSET: Optional. If supplied, it is be added to the initial address.
2835 LOOP: Specify relative to which loop-nest should the address be computed.
2836 For example, when the dataref is in an inner-loop nested in an
2837 outer-loop that is now being vectorized, LOOP can be either the
2838 outer-loop, or the inner-loop. The first memory location accessed
2839 by the following dataref ('in' points to short):
2841 for (i=0; i<N; i++)
2842 for (j=0; j<M; j++)
2843 s += in[i+j]
2845 is as follows:
2846 if LOOP=i_loop: &in (relative to i_loop)
2847 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2849 Output:
2850 1. Return an SSA_NAME whose value is the address of the memory location of
2851 the first vector of the data reference.
2852 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2853 these statement(s) which define the returned SSA_NAME.
2855 FORNOW: We are only handling array accesses with step 1. */
2857 tree
2858 vect_create_addr_base_for_vector_ref (gimple stmt,
2859 gimple_seq *new_stmt_list,
2860 tree offset,
2861 struct loop *loop)
2863 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2864 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2865 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2866 tree base_name;
2867 tree data_ref_base_var;
2868 tree vec_stmt;
2869 tree addr_base, addr_expr;
2870 tree dest;
2871 gimple_seq seq = NULL;
2872 tree base_offset = unshare_expr (DR_OFFSET (dr));
2873 tree init = unshare_expr (DR_INIT (dr));
2874 tree vect_ptr_type;
2875 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2876 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2877 tree base;
2879 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2881 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2883 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2885 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2886 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2887 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2890 if (loop_vinfo)
2891 base_name = build_fold_indirect_ref (data_ref_base);
2892 else
2894 base_offset = ssize_int (0);
2895 init = ssize_int (0);
2896 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2899 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2900 add_referenced_var (data_ref_base_var);
2901 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2902 data_ref_base_var);
2903 gimple_seq_add_seq (new_stmt_list, seq);
2905 /* Create base_offset */
2906 base_offset = size_binop (PLUS_EXPR,
2907 fold_convert (sizetype, base_offset),
2908 fold_convert (sizetype, init));
2909 dest = create_tmp_var (sizetype, "base_off");
2910 add_referenced_var (dest);
2911 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2912 gimple_seq_add_seq (new_stmt_list, seq);
2914 if (offset)
2916 tree tmp = create_tmp_var (sizetype, "offset");
2918 add_referenced_var (tmp);
2919 offset = fold_build2 (MULT_EXPR, sizetype,
2920 fold_convert (sizetype, offset), step);
2921 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2922 base_offset, offset);
2923 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2924 gimple_seq_add_seq (new_stmt_list, seq);
2927 /* base + base_offset */
2928 if (loop_vinfo)
2929 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
2930 else
2932 addr_base = build1 (ADDR_EXPR,
2933 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2934 unshare_expr (DR_REF (dr)));
2937 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2938 base = get_base_address (DR_REF (dr));
2939 if (base
2940 && TREE_CODE (base) == MEM_REF)
2941 vect_ptr_type
2942 = build_qualified_type (vect_ptr_type,
2943 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2945 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2946 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2947 get_name (base_name));
2948 add_referenced_var (addr_expr);
2949 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2950 gimple_seq_add_seq (new_stmt_list, seq);
2952 if (DR_PTR_INFO (dr)
2953 && TREE_CODE (vec_stmt) == SSA_NAME)
2955 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2956 if (offset)
2958 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2959 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2963 if (vect_print_dump_info (REPORT_DETAILS))
2965 fprintf (vect_dump, "created ");
2966 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2969 return vec_stmt;
2973 /* Function vect_create_data_ref_ptr.
2975 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
2976 location accessed in the loop by STMT, along with the def-use update
2977 chain to appropriately advance the pointer through the loop iterations.
2978 Also set aliasing information for the pointer. This pointer is used by
2979 the callers to this function to create a memory reference expression for
2980 vector load/store access.
2982 Input:
2983 1. STMT: a stmt that references memory. Expected to be of the form
2984 GIMPLE_ASSIGN <name, data-ref> or
2985 GIMPLE_ASSIGN <data-ref, name>.
2986 2. AGGR_TYPE: the type of the reference, which should be either a vector
2987 or an array.
2988 3. AT_LOOP: the loop where the vector memref is to be created.
2989 4. OFFSET (optional): an offset to be added to the initial address accessed
2990 by the data-ref in STMT.
2991 5. BSI: location where the new stmts are to be placed if there is no loop
2992 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
2993 pointing to the initial address.
2995 Output:
2996 1. Declare a new ptr to vector_type, and have it point to the base of the
2997 data reference (initial addressed accessed by the data reference).
2998 For example, for vector of type V8HI, the following code is generated:
3000 v8hi *ap;
3001 ap = (v8hi *)initial_address;
3003 if OFFSET is not supplied:
3004 initial_address = &a[init];
3005 if OFFSET is supplied:
3006 initial_address = &a[init + OFFSET];
3008 Return the initial_address in INITIAL_ADDRESS.
3010 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3011 update the pointer in each iteration of the loop.
3013 Return the increment stmt that updates the pointer in PTR_INCR.
3015 3. Set INV_P to true if the access pattern of the data reference in the
3016 vectorized loop is invariant. Set it to false otherwise.
3018 4. Return the pointer. */
3020 tree
3021 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3022 tree offset, tree *initial_address,
3023 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3024 bool only_init, bool *inv_p)
3026 tree base_name;
3027 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3028 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3029 struct loop *loop = NULL;
3030 bool nested_in_vect_loop = false;
3031 struct loop *containing_loop = NULL;
3032 tree aggr_ptr_type;
3033 tree aggr_ptr;
3034 tree new_temp;
3035 gimple vec_stmt;
3036 gimple_seq new_stmt_list = NULL;
3037 edge pe = NULL;
3038 basic_block new_bb;
3039 tree aggr_ptr_init;
3040 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3041 tree aptr;
3042 gimple_stmt_iterator incr_gsi;
3043 bool insert_after;
3044 bool negative;
3045 tree indx_before_incr, indx_after_incr;
3046 gimple incr;
3047 tree step;
3048 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3049 tree base;
3051 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3052 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3054 if (loop_vinfo)
3056 loop = LOOP_VINFO_LOOP (loop_vinfo);
3057 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3058 containing_loop = (gimple_bb (stmt))->loop_father;
3059 pe = loop_preheader_edge (loop);
3061 else
3063 gcc_assert (bb_vinfo);
3064 only_init = true;
3065 *ptr_incr = NULL;
3068 /* Check the step (evolution) of the load in LOOP, and record
3069 whether it's invariant. */
3070 if (nested_in_vect_loop)
3071 step = STMT_VINFO_DR_STEP (stmt_info);
3072 else
3073 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3075 if (tree_int_cst_compare (step, size_zero_node) == 0)
3076 *inv_p = true;
3077 else
3078 *inv_p = false;
3079 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3081 /* Create an expression for the first address accessed by this load
3082 in LOOP. */
3083 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3085 if (vect_print_dump_info (REPORT_DETAILS))
3087 tree data_ref_base = base_name;
3088 fprintf (vect_dump, "create %s-pointer variable to type: ",
3089 tree_code_name[(int) TREE_CODE (aggr_type)]);
3090 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3091 if (TREE_CODE (data_ref_base) == VAR_DECL
3092 || TREE_CODE (data_ref_base) == ARRAY_REF)
3093 fprintf (vect_dump, " vectorizing an array ref: ");
3094 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3095 fprintf (vect_dump, " vectorizing a record based array ref: ");
3096 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3097 fprintf (vect_dump, " vectorizing a pointer ref: ");
3098 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3101 /* (1) Create the new aggregate-pointer variable. */
3102 aggr_ptr_type = build_pointer_type (aggr_type);
3103 base = get_base_address (DR_REF (dr));
3104 if (base
3105 && TREE_CODE (base) == MEM_REF)
3106 aggr_ptr_type
3107 = build_qualified_type (aggr_ptr_type,
3108 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3109 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3110 get_name (base_name));
3112 /* Vector and array types inherit the alias set of their component
3113 type by default so we need to use a ref-all pointer if the data
3114 reference does not conflict with the created aggregated data
3115 reference because it is not addressable. */
3116 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3117 get_alias_set (DR_REF (dr))))
3119 aggr_ptr_type
3120 = build_pointer_type_for_mode (aggr_type,
3121 TYPE_MODE (aggr_ptr_type), true);
3122 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3123 get_name (base_name));
3126 /* Likewise for any of the data references in the stmt group. */
3127 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3129 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3132 tree lhs = gimple_assign_lhs (orig_stmt);
3133 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3134 get_alias_set (lhs)))
3136 aggr_ptr_type
3137 = build_pointer_type_for_mode (aggr_type,
3138 TYPE_MODE (aggr_ptr_type), true);
3139 aggr_ptr
3140 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3141 get_name (base_name));
3142 break;
3145 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3147 while (orig_stmt);
3150 add_referenced_var (aggr_ptr);
3152 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3153 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3154 def-use update cycles for the pointer: one relative to the outer-loop
3155 (LOOP), which is what steps (3) and (4) below do. The other is relative
3156 to the inner-loop (which is the inner-most loop containing the dataref),
3157 and this is done be step (5) below.
3159 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3160 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3161 redundant. Steps (3),(4) create the following:
3163 vp0 = &base_addr;
3164 LOOP: vp1 = phi(vp0,vp2)
3167 vp2 = vp1 + step
3168 goto LOOP
3170 If there is an inner-loop nested in loop, then step (5) will also be
3171 applied, and an additional update in the inner-loop will be created:
3173 vp0 = &base_addr;
3174 LOOP: vp1 = phi(vp0,vp2)
3176 inner: vp3 = phi(vp1,vp4)
3177 vp4 = vp3 + inner_step
3178 if () goto inner
3180 vp2 = vp1 + step
3181 if () goto LOOP */
3183 /* (2) Calculate the initial address of the aggregate-pointer, and set
3184 the aggregate-pointer to point to it before the loop. */
3186 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3188 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3189 offset, loop);
3190 if (new_stmt_list)
3192 if (pe)
3194 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3195 gcc_assert (!new_bb);
3197 else
3198 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3201 *initial_address = new_temp;
3203 /* Create: p = (aggr_type *) initial_base */
3204 if (TREE_CODE (new_temp) != SSA_NAME
3205 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3207 vec_stmt = gimple_build_assign (aggr_ptr,
3208 fold_convert (aggr_ptr_type, new_temp));
3209 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3210 /* Copy the points-to information if it exists. */
3211 if (DR_PTR_INFO (dr))
3212 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3213 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3214 if (pe)
3216 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3217 gcc_assert (!new_bb);
3219 else
3220 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3222 else
3223 aggr_ptr_init = new_temp;
3225 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3226 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3227 inner-loop nested in LOOP (during outer-loop vectorization). */
3229 /* No update in loop is required. */
3230 if (only_init && (!loop_vinfo || at_loop == loop))
3231 aptr = aggr_ptr_init;
3232 else
3234 /* The step of the aggregate pointer is the type size. */
3235 tree step = TYPE_SIZE_UNIT (aggr_type);
3236 /* One exception to the above is when the scalar step of the load in
3237 LOOP is zero. In this case the step here is also zero. */
3238 if (*inv_p)
3239 step = size_zero_node;
3240 else if (negative)
3241 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3243 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3245 create_iv (aggr_ptr_init,
3246 fold_convert (aggr_ptr_type, step),
3247 aggr_ptr, loop, &incr_gsi, insert_after,
3248 &indx_before_incr, &indx_after_incr);
3249 incr = gsi_stmt (incr_gsi);
3250 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3252 /* Copy the points-to information if it exists. */
3253 if (DR_PTR_INFO (dr))
3255 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3256 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3258 if (ptr_incr)
3259 *ptr_incr = incr;
3261 aptr = indx_before_incr;
3264 if (!nested_in_vect_loop || only_init)
3265 return aptr;
3268 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3269 nested in LOOP, if exists. */
3271 gcc_assert (nested_in_vect_loop);
3272 if (!only_init)
3274 standard_iv_increment_position (containing_loop, &incr_gsi,
3275 &insert_after);
3276 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3277 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3278 &indx_after_incr);
3279 incr = gsi_stmt (incr_gsi);
3280 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3282 /* Copy the points-to information if it exists. */
3283 if (DR_PTR_INFO (dr))
3285 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3286 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3288 if (ptr_incr)
3289 *ptr_incr = incr;
3291 return indx_before_incr;
3293 else
3294 gcc_unreachable ();
3298 /* Function bump_vector_ptr
3300 Increment a pointer (to a vector type) by vector-size. If requested,
3301 i.e. if PTR-INCR is given, then also connect the new increment stmt
3302 to the existing def-use update-chain of the pointer, by modifying
3303 the PTR_INCR as illustrated below:
3305 The pointer def-use update-chain before this function:
3306 DATAREF_PTR = phi (p_0, p_2)
3307 ....
3308 PTR_INCR: p_2 = DATAREF_PTR + step
3310 The pointer def-use update-chain after this function:
3311 DATAREF_PTR = phi (p_0, p_2)
3312 ....
3313 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3314 ....
3315 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3317 Input:
3318 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3319 in the loop.
3320 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3321 the loop. The increment amount across iterations is expected
3322 to be vector_size.
3323 BSI - location where the new update stmt is to be placed.
3324 STMT - the original scalar memory-access stmt that is being vectorized.
3325 BUMP - optional. The offset by which to bump the pointer. If not given,
3326 the offset is assumed to be vector_size.
3328 Output: Return NEW_DATAREF_PTR as illustrated above.
3332 tree
3333 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3334 gimple stmt, tree bump)
3336 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3337 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3338 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3339 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3340 tree update = TYPE_SIZE_UNIT (vectype);
3341 gimple incr_stmt;
3342 ssa_op_iter iter;
3343 use_operand_p use_p;
3344 tree new_dataref_ptr;
3346 if (bump)
3347 update = bump;
3349 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3350 dataref_ptr, update);
3351 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3352 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3353 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3355 /* Copy the points-to information if it exists. */
3356 if (DR_PTR_INFO (dr))
3358 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3359 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3360 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3363 if (!ptr_incr)
3364 return new_dataref_ptr;
3366 /* Update the vector-pointer's cross-iteration increment. */
3367 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3369 tree use = USE_FROM_PTR (use_p);
3371 if (use == dataref_ptr)
3372 SET_USE (use_p, new_dataref_ptr);
3373 else
3374 gcc_assert (tree_int_cst_compare (use, update) == 0);
3377 return new_dataref_ptr;
3381 /* Function vect_create_destination_var.
3383 Create a new temporary of type VECTYPE. */
3385 tree
3386 vect_create_destination_var (tree scalar_dest, tree vectype)
3388 tree vec_dest;
3389 const char *new_name;
3390 tree type;
3391 enum vect_var_kind kind;
3393 kind = vectype ? vect_simple_var : vect_scalar_var;
3394 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3396 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3398 new_name = get_name (scalar_dest);
3399 if (!new_name)
3400 new_name = "var_";
3401 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3402 add_referenced_var (vec_dest);
3404 return vec_dest;
3407 /* Function vect_strided_store_supported.
3409 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3410 and FALSE otherwise. */
3412 bool
3413 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3415 optab interleave_high_optab, interleave_low_optab;
3416 enum machine_mode mode;
3418 mode = TYPE_MODE (vectype);
3420 /* vect_permute_store_chain requires the group size to be a power of two. */
3421 if (exact_log2 (count) == -1)
3423 if (vect_print_dump_info (REPORT_DETAILS))
3424 fprintf (vect_dump, "the size of the group of strided accesses"
3425 " is not a power of 2");
3426 return false;
3429 /* Check that the operation is supported. */
3430 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3431 vectype, optab_default);
3432 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3433 vectype, optab_default);
3434 if (!interleave_high_optab || !interleave_low_optab)
3436 if (vect_print_dump_info (REPORT_DETAILS))
3437 fprintf (vect_dump, "no optab for interleave.");
3438 return false;
3441 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3442 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3444 if (vect_print_dump_info (REPORT_DETAILS))
3445 fprintf (vect_dump, "interleave op not supported by target.");
3446 return false;
3449 return true;
3453 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3454 type VECTYPE. */
3456 bool
3457 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3459 return vect_lanes_optab_supported_p ("vec_store_lanes",
3460 vec_store_lanes_optab,
3461 vectype, count);
3465 /* Function vect_permute_store_chain.
3467 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3468 a power of 2, generate interleave_high/low stmts to reorder the data
3469 correctly for the stores. Return the final references for stores in
3470 RESULT_CHAIN.
3472 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3473 The input is 4 vectors each containing 8 elements. We assign a number to
3474 each element, the input sequence is:
3476 1st vec: 0 1 2 3 4 5 6 7
3477 2nd vec: 8 9 10 11 12 13 14 15
3478 3rd vec: 16 17 18 19 20 21 22 23
3479 4th vec: 24 25 26 27 28 29 30 31
3481 The output sequence should be:
3483 1st vec: 0 8 16 24 1 9 17 25
3484 2nd vec: 2 10 18 26 3 11 19 27
3485 3rd vec: 4 12 20 28 5 13 21 30
3486 4th vec: 6 14 22 30 7 15 23 31
3488 i.e., we interleave the contents of the four vectors in their order.
3490 We use interleave_high/low instructions to create such output. The input of
3491 each interleave_high/low operation is two vectors:
3492 1st vec 2nd vec
3493 0 1 2 3 4 5 6 7
3494 the even elements of the result vector are obtained left-to-right from the
3495 high/low elements of the first vector. The odd elements of the result are
3496 obtained left-to-right from the high/low elements of the second vector.
3497 The output of interleave_high will be: 0 4 1 5
3498 and of interleave_low: 2 6 3 7
3501 The permutation is done in log LENGTH stages. In each stage interleave_high
3502 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3503 where the first argument is taken from the first half of DR_CHAIN and the
3504 second argument from it's second half.
3505 In our example,
3507 I1: interleave_high (1st vec, 3rd vec)
3508 I2: interleave_low (1st vec, 3rd vec)
3509 I3: interleave_high (2nd vec, 4th vec)
3510 I4: interleave_low (2nd vec, 4th vec)
3512 The output for the first stage is:
3514 I1: 0 16 1 17 2 18 3 19
3515 I2: 4 20 5 21 6 22 7 23
3516 I3: 8 24 9 25 10 26 11 27
3517 I4: 12 28 13 29 14 30 15 31
3519 The output of the second stage, i.e. the final result is:
3521 I1: 0 8 16 24 1 9 17 25
3522 I2: 2 10 18 26 3 11 19 27
3523 I3: 4 12 20 28 5 13 21 30
3524 I4: 6 14 22 30 7 15 23 31. */
3526 void
3527 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3528 unsigned int length,
3529 gimple stmt,
3530 gimple_stmt_iterator *gsi,
3531 VEC(tree,heap) **result_chain)
3533 tree perm_dest, vect1, vect2, high, low;
3534 gimple perm_stmt;
3535 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3536 int i;
3537 unsigned int j;
3538 enum tree_code high_code, low_code;
3540 gcc_assert (vect_strided_store_supported (vectype, length));
3542 *result_chain = VEC_copy (tree, heap, dr_chain);
3544 for (i = 0; i < exact_log2 (length); i++)
3546 for (j = 0; j < length/2; j++)
3548 vect1 = VEC_index (tree, dr_chain, j);
3549 vect2 = VEC_index (tree, dr_chain, j+length/2);
3551 /* Create interleaving stmt:
3552 in the case of big endian:
3553 high = interleave_high (vect1, vect2)
3554 and in the case of little endian:
3555 high = interleave_low (vect1, vect2). */
3556 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3557 DECL_GIMPLE_REG_P (perm_dest) = 1;
3558 add_referenced_var (perm_dest);
3559 if (BYTES_BIG_ENDIAN)
3561 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3562 low_code = VEC_INTERLEAVE_LOW_EXPR;
3564 else
3566 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3567 high_code = VEC_INTERLEAVE_LOW_EXPR;
3569 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3570 vect1, vect2);
3571 high = make_ssa_name (perm_dest, perm_stmt);
3572 gimple_assign_set_lhs (perm_stmt, high);
3573 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3574 VEC_replace (tree, *result_chain, 2*j, high);
3576 /* Create interleaving stmt:
3577 in the case of big endian:
3578 low = interleave_low (vect1, vect2)
3579 and in the case of little endian:
3580 low = interleave_high (vect1, vect2). */
3581 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3582 DECL_GIMPLE_REG_P (perm_dest) = 1;
3583 add_referenced_var (perm_dest);
3584 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3585 vect1, vect2);
3586 low = make_ssa_name (perm_dest, perm_stmt);
3587 gimple_assign_set_lhs (perm_stmt, low);
3588 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3589 VEC_replace (tree, *result_chain, 2*j+1, low);
3591 dr_chain = VEC_copy (tree, heap, *result_chain);
3595 /* Function vect_setup_realignment
3597 This function is called when vectorizing an unaligned load using
3598 the dr_explicit_realign[_optimized] scheme.
3599 This function generates the following code at the loop prolog:
3601 p = initial_addr;
3602 x msq_init = *(floor(p)); # prolog load
3603 realignment_token = call target_builtin;
3604 loop:
3605 x msq = phi (msq_init, ---)
3607 The stmts marked with x are generated only for the case of
3608 dr_explicit_realign_optimized.
3610 The code above sets up a new (vector) pointer, pointing to the first
3611 location accessed by STMT, and a "floor-aligned" load using that pointer.
3612 It also generates code to compute the "realignment-token" (if the relevant
3613 target hook was defined), and creates a phi-node at the loop-header bb
3614 whose arguments are the result of the prolog-load (created by this
3615 function) and the result of a load that takes place in the loop (to be
3616 created by the caller to this function).
3618 For the case of dr_explicit_realign_optimized:
3619 The caller to this function uses the phi-result (msq) to create the
3620 realignment code inside the loop, and sets up the missing phi argument,
3621 as follows:
3622 loop:
3623 msq = phi (msq_init, lsq)
3624 lsq = *(floor(p')); # load in loop
3625 result = realign_load (msq, lsq, realignment_token);
3627 For the case of dr_explicit_realign:
3628 loop:
3629 msq = *(floor(p)); # load in loop
3630 p' = p + (VS-1);
3631 lsq = *(floor(p')); # load in loop
3632 result = realign_load (msq, lsq, realignment_token);
3634 Input:
3635 STMT - (scalar) load stmt to be vectorized. This load accesses
3636 a memory location that may be unaligned.
3637 BSI - place where new code is to be inserted.
3638 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3639 is used.
3641 Output:
3642 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3643 target hook, if defined.
3644 Return value - the result of the loop-header phi node. */
3646 tree
3647 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3648 tree *realignment_token,
3649 enum dr_alignment_support alignment_support_scheme,
3650 tree init_addr,
3651 struct loop **at_loop)
3653 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3654 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3655 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3656 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3657 struct loop *loop = NULL;
3658 edge pe = NULL;
3659 tree scalar_dest = gimple_assign_lhs (stmt);
3660 tree vec_dest;
3661 gimple inc;
3662 tree ptr;
3663 tree data_ref;
3664 gimple new_stmt;
3665 basic_block new_bb;
3666 tree msq_init = NULL_TREE;
3667 tree new_temp;
3668 gimple phi_stmt;
3669 tree msq = NULL_TREE;
3670 gimple_seq stmts = NULL;
3671 bool inv_p;
3672 bool compute_in_loop = false;
3673 bool nested_in_vect_loop = false;
3674 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3675 struct loop *loop_for_initial_load = NULL;
3677 if (loop_vinfo)
3679 loop = LOOP_VINFO_LOOP (loop_vinfo);
3680 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3683 gcc_assert (alignment_support_scheme == dr_explicit_realign
3684 || alignment_support_scheme == dr_explicit_realign_optimized);
3686 /* We need to generate three things:
3687 1. the misalignment computation
3688 2. the extra vector load (for the optimized realignment scheme).
3689 3. the phi node for the two vectors from which the realignment is
3690 done (for the optimized realignment scheme). */
3692 /* 1. Determine where to generate the misalignment computation.
3694 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3695 calculation will be generated by this function, outside the loop (in the
3696 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3697 caller, inside the loop.
3699 Background: If the misalignment remains fixed throughout the iterations of
3700 the loop, then both realignment schemes are applicable, and also the
3701 misalignment computation can be done outside LOOP. This is because we are
3702 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3703 are a multiple of VS (the Vector Size), and therefore the misalignment in
3704 different vectorized LOOP iterations is always the same.
3705 The problem arises only if the memory access is in an inner-loop nested
3706 inside LOOP, which is now being vectorized using outer-loop vectorization.
3707 This is the only case when the misalignment of the memory access may not
3708 remain fixed throughout the iterations of the inner-loop (as explained in
3709 detail in vect_supportable_dr_alignment). In this case, not only is the
3710 optimized realignment scheme not applicable, but also the misalignment
3711 computation (and generation of the realignment token that is passed to
3712 REALIGN_LOAD) have to be done inside the loop.
3714 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3715 or not, which in turn determines if the misalignment is computed inside
3716 the inner-loop, or outside LOOP. */
3718 if (init_addr != NULL_TREE || !loop_vinfo)
3720 compute_in_loop = true;
3721 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3725 /* 2. Determine where to generate the extra vector load.
3727 For the optimized realignment scheme, instead of generating two vector
3728 loads in each iteration, we generate a single extra vector load in the
3729 preheader of the loop, and in each iteration reuse the result of the
3730 vector load from the previous iteration. In case the memory access is in
3731 an inner-loop nested inside LOOP, which is now being vectorized using
3732 outer-loop vectorization, we need to determine whether this initial vector
3733 load should be generated at the preheader of the inner-loop, or can be
3734 generated at the preheader of LOOP. If the memory access has no evolution
3735 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3736 to be generated inside LOOP (in the preheader of the inner-loop). */
3738 if (nested_in_vect_loop)
3740 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3741 bool invariant_in_outerloop =
3742 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3743 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3745 else
3746 loop_for_initial_load = loop;
3747 if (at_loop)
3748 *at_loop = loop_for_initial_load;
3750 if (loop_for_initial_load)
3751 pe = loop_preheader_edge (loop_for_initial_load);
3753 /* 3. For the case of the optimized realignment, create the first vector
3754 load at the loop preheader. */
3756 if (alignment_support_scheme == dr_explicit_realign_optimized)
3758 /* Create msq_init = *(floor(p1)) in the loop preheader */
3760 gcc_assert (!compute_in_loop);
3761 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3762 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
3763 NULL_TREE, &init_addr, NULL, &inc,
3764 true, &inv_p);
3765 new_stmt = gimple_build_assign_with_ops
3766 (BIT_AND_EXPR, NULL_TREE, ptr,
3767 build_int_cst (TREE_TYPE (ptr),
3768 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3769 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3770 gimple_assign_set_lhs (new_stmt, new_temp);
3771 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3772 gcc_assert (!new_bb);
3773 data_ref
3774 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3775 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3776 new_stmt = gimple_build_assign (vec_dest, data_ref);
3777 new_temp = make_ssa_name (vec_dest, new_stmt);
3778 gimple_assign_set_lhs (new_stmt, new_temp);
3779 mark_symbols_for_renaming (new_stmt);
3780 if (pe)
3782 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3783 gcc_assert (!new_bb);
3785 else
3786 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3788 msq_init = gimple_assign_lhs (new_stmt);
3791 /* 4. Create realignment token using a target builtin, if available.
3792 It is done either inside the containing loop, or before LOOP (as
3793 determined above). */
3795 if (targetm.vectorize.builtin_mask_for_load)
3797 tree builtin_decl;
3799 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3800 if (!init_addr)
3802 /* Generate the INIT_ADDR computation outside LOOP. */
3803 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3804 NULL_TREE, loop);
3805 if (loop)
3807 pe = loop_preheader_edge (loop);
3808 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3809 gcc_assert (!new_bb);
3811 else
3812 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3815 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3816 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3817 vec_dest =
3818 vect_create_destination_var (scalar_dest,
3819 gimple_call_return_type (new_stmt));
3820 new_temp = make_ssa_name (vec_dest, new_stmt);
3821 gimple_call_set_lhs (new_stmt, new_temp);
3823 if (compute_in_loop)
3824 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3825 else
3827 /* Generate the misalignment computation outside LOOP. */
3828 pe = loop_preheader_edge (loop);
3829 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3830 gcc_assert (!new_bb);
3833 *realignment_token = gimple_call_lhs (new_stmt);
3835 /* The result of the CALL_EXPR to this builtin is determined from
3836 the value of the parameter and no global variables are touched
3837 which makes the builtin a "const" function. Requiring the
3838 builtin to have the "const" attribute makes it unnecessary
3839 to call mark_call_clobbered. */
3840 gcc_assert (TREE_READONLY (builtin_decl));
3843 if (alignment_support_scheme == dr_explicit_realign)
3844 return msq;
3846 gcc_assert (!compute_in_loop);
3847 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3850 /* 5. Create msq = phi <msq_init, lsq> in loop */
3852 pe = loop_preheader_edge (containing_loop);
3853 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3854 msq = make_ssa_name (vec_dest, NULL);
3855 phi_stmt = create_phi_node (msq, containing_loop->header);
3856 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3857 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3859 return msq;
3863 /* Function vect_strided_load_supported.
3865 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3866 and FALSE otherwise. */
3868 bool
3869 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
3871 optab perm_even_optab, perm_odd_optab;
3872 enum machine_mode mode;
3874 mode = TYPE_MODE (vectype);
3876 /* vect_permute_load_chain requires the group size to be a power of two. */
3877 if (exact_log2 (count) == -1)
3879 if (vect_print_dump_info (REPORT_DETAILS))
3880 fprintf (vect_dump, "the size of the group of strided accesses"
3881 " is not a power of 2");
3882 return false;
3885 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3886 optab_default);
3887 if (!perm_even_optab)
3889 if (vect_print_dump_info (REPORT_DETAILS))
3890 fprintf (vect_dump, "no optab for perm_even.");
3891 return false;
3894 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3896 if (vect_print_dump_info (REPORT_DETAILS))
3897 fprintf (vect_dump, "perm_even op not supported by target.");
3898 return false;
3901 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3902 optab_default);
3903 if (!perm_odd_optab)
3905 if (vect_print_dump_info (REPORT_DETAILS))
3906 fprintf (vect_dump, "no optab for perm_odd.");
3907 return false;
3910 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3912 if (vect_print_dump_info (REPORT_DETAILS))
3913 fprintf (vect_dump, "perm_odd op not supported by target.");
3914 return false;
3916 return true;
3919 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
3920 type VECTYPE. */
3922 bool
3923 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3925 return vect_lanes_optab_supported_p ("vec_load_lanes",
3926 vec_load_lanes_optab,
3927 vectype, count);
3930 /* Function vect_permute_load_chain.
3932 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3933 a power of 2, generate extract_even/odd stmts to reorder the input data
3934 correctly. Return the final references for loads in RESULT_CHAIN.
3936 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3937 The input is 4 vectors each containing 8 elements. We assign a number to each
3938 element, the input sequence is:
3940 1st vec: 0 1 2 3 4 5 6 7
3941 2nd vec: 8 9 10 11 12 13 14 15
3942 3rd vec: 16 17 18 19 20 21 22 23
3943 4th vec: 24 25 26 27 28 29 30 31
3945 The output sequence should be:
3947 1st vec: 0 4 8 12 16 20 24 28
3948 2nd vec: 1 5 9 13 17 21 25 29
3949 3rd vec: 2 6 10 14 18 22 26 30
3950 4th vec: 3 7 11 15 19 23 27 31
3952 i.e., the first output vector should contain the first elements of each
3953 interleaving group, etc.
3955 We use extract_even/odd instructions to create such output. The input of
3956 each extract_even/odd operation is two vectors
3957 1st vec 2nd vec
3958 0 1 2 3 4 5 6 7
3960 and the output is the vector of extracted even/odd elements. The output of
3961 extract_even will be: 0 2 4 6
3962 and of extract_odd: 1 3 5 7
3965 The permutation is done in log LENGTH stages. In each stage extract_even
3966 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3967 their order. In our example,
3969 E1: extract_even (1st vec, 2nd vec)
3970 E2: extract_odd (1st vec, 2nd vec)
3971 E3: extract_even (3rd vec, 4th vec)
3972 E4: extract_odd (3rd vec, 4th vec)
3974 The output for the first stage will be:
3976 E1: 0 2 4 6 8 10 12 14
3977 E2: 1 3 5 7 9 11 13 15
3978 E3: 16 18 20 22 24 26 28 30
3979 E4: 17 19 21 23 25 27 29 31
3981 In order to proceed and create the correct sequence for the next stage (or
3982 for the correct output, if the second stage is the last one, as in our
3983 example), we first put the output of extract_even operation and then the
3984 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3985 The input for the second stage is:
3987 1st vec (E1): 0 2 4 6 8 10 12 14
3988 2nd vec (E3): 16 18 20 22 24 26 28 30
3989 3rd vec (E2): 1 3 5 7 9 11 13 15
3990 4th vec (E4): 17 19 21 23 25 27 29 31
3992 The output of the second stage:
3994 E1: 0 4 8 12 16 20 24 28
3995 E2: 2 6 10 14 18 22 26 30
3996 E3: 1 5 9 13 17 21 25 29
3997 E4: 3 7 11 15 19 23 27 31
3999 And RESULT_CHAIN after reordering:
4001 1st vec (E1): 0 4 8 12 16 20 24 28
4002 2nd vec (E3): 1 5 9 13 17 21 25 29
4003 3rd vec (E2): 2 6 10 14 18 22 26 30
4004 4th vec (E4): 3 7 11 15 19 23 27 31. */
4006 static void
4007 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4008 unsigned int length,
4009 gimple stmt,
4010 gimple_stmt_iterator *gsi,
4011 VEC(tree,heap) **result_chain)
4013 tree perm_dest, data_ref, first_vect, second_vect;
4014 gimple perm_stmt;
4015 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4016 int i;
4017 unsigned int j;
4019 gcc_assert (vect_strided_load_supported (vectype, length));
4021 *result_chain = VEC_copy (tree, heap, dr_chain);
4022 for (i = 0; i < exact_log2 (length); i++)
4024 for (j = 0; j < length; j +=2)
4026 first_vect = VEC_index (tree, dr_chain, j);
4027 second_vect = VEC_index (tree, dr_chain, j+1);
4029 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4030 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4031 DECL_GIMPLE_REG_P (perm_dest) = 1;
4032 add_referenced_var (perm_dest);
4034 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
4035 perm_dest, first_vect,
4036 second_vect);
4038 data_ref = make_ssa_name (perm_dest, perm_stmt);
4039 gimple_assign_set_lhs (perm_stmt, data_ref);
4040 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4041 mark_symbols_for_renaming (perm_stmt);
4043 VEC_replace (tree, *result_chain, j/2, data_ref);
4045 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4046 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4047 DECL_GIMPLE_REG_P (perm_dest) = 1;
4048 add_referenced_var (perm_dest);
4050 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
4051 perm_dest, first_vect,
4052 second_vect);
4053 data_ref = make_ssa_name (perm_dest, perm_stmt);
4054 gimple_assign_set_lhs (perm_stmt, data_ref);
4055 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4056 mark_symbols_for_renaming (perm_stmt);
4058 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4060 dr_chain = VEC_copy (tree, heap, *result_chain);
4065 /* Function vect_transform_strided_load.
4067 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4068 to perform their permutation and ascribe the result vectorized statements to
4069 the scalar statements.
4072 void
4073 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4074 gimple_stmt_iterator *gsi)
4076 VEC(tree,heap) *result_chain = NULL;
4078 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4079 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4080 vectors, that are ready for vector computation. */
4081 result_chain = VEC_alloc (tree, heap, size);
4082 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4083 vect_record_strided_load_vectors (stmt, result_chain);
4084 VEC_free (tree, heap, result_chain);
4087 /* RESULT_CHAIN contains the output of a group of strided loads that were
4088 generated as part of the vectorization of STMT. Assign the statement
4089 for each vector to the associated scalar statement. */
4091 void
4092 vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4094 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4095 gimple next_stmt, new_stmt;
4096 unsigned int i, gap_count;
4097 tree tmp_data_ref;
4099 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4100 Since we scan the chain starting from it's first node, their order
4101 corresponds the order of data-refs in RESULT_CHAIN. */
4102 next_stmt = first_stmt;
4103 gap_count = 1;
4104 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4106 if (!next_stmt)
4107 break;
4109 /* Skip the gaps. Loads created for the gaps will be removed by dead
4110 code elimination pass later. No need to check for the first stmt in
4111 the group, since it always exists.
4112 GROUP_GAP is the number of steps in elements from the previous
4113 access (if there is no gap GROUP_GAP is 1). We skip loads that
4114 correspond to the gaps. */
4115 if (next_stmt != first_stmt
4116 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4118 gap_count++;
4119 continue;
4122 while (next_stmt)
4124 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4125 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4126 copies, and we put the new vector statement in the first available
4127 RELATED_STMT. */
4128 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4129 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4130 else
4132 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4134 gimple prev_stmt =
4135 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4136 gimple rel_stmt =
4137 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4138 while (rel_stmt)
4140 prev_stmt = rel_stmt;
4141 rel_stmt =
4142 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4145 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4146 new_stmt;
4150 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4151 gap_count = 1;
4152 /* If NEXT_STMT accesses the same DR as the previous statement,
4153 put the same TMP_DATA_REF as its vectorized statement; otherwise
4154 get the next data-ref from RESULT_CHAIN. */
4155 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4156 break;
4161 /* Function vect_force_dr_alignment_p.
4163 Returns whether the alignment of a DECL can be forced to be aligned
4164 on ALIGNMENT bit boundary. */
4166 bool
4167 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4169 if (TREE_CODE (decl) != VAR_DECL)
4170 return false;
4172 if (DECL_EXTERNAL (decl))
4173 return false;
4175 if (TREE_ASM_WRITTEN (decl))
4176 return false;
4178 if (TREE_STATIC (decl))
4179 return (alignment <= MAX_OFILE_ALIGNMENT);
4180 else
4181 return (alignment <= MAX_STACK_ALIGNMENT);
4185 /* Return whether the data reference DR is supported with respect to its
4186 alignment.
4187 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4188 it is aligned, i.e., check if it is possible to vectorize it with different
4189 alignment. */
4191 enum dr_alignment_support
4192 vect_supportable_dr_alignment (struct data_reference *dr,
4193 bool check_aligned_accesses)
4195 gimple stmt = DR_STMT (dr);
4196 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4197 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4198 enum machine_mode mode = TYPE_MODE (vectype);
4199 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4200 struct loop *vect_loop = NULL;
4201 bool nested_in_vect_loop = false;
4203 if (aligned_access_p (dr) && !check_aligned_accesses)
4204 return dr_aligned;
4206 if (loop_vinfo)
4208 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4209 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4212 /* Possibly unaligned access. */
4214 /* We can choose between using the implicit realignment scheme (generating
4215 a misaligned_move stmt) and the explicit realignment scheme (generating
4216 aligned loads with a REALIGN_LOAD). There are two variants to the
4217 explicit realignment scheme: optimized, and unoptimized.
4218 We can optimize the realignment only if the step between consecutive
4219 vector loads is equal to the vector size. Since the vector memory
4220 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4221 is guaranteed that the misalignment amount remains the same throughout the
4222 execution of the vectorized loop. Therefore, we can create the
4223 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4224 at the loop preheader.
4226 However, in the case of outer-loop vectorization, when vectorizing a
4227 memory access in the inner-loop nested within the LOOP that is now being
4228 vectorized, while it is guaranteed that the misalignment of the
4229 vectorized memory access will remain the same in different outer-loop
4230 iterations, it is *not* guaranteed that is will remain the same throughout
4231 the execution of the inner-loop. This is because the inner-loop advances
4232 with the original scalar step (and not in steps of VS). If the inner-loop
4233 step happens to be a multiple of VS, then the misalignment remains fixed
4234 and we can use the optimized realignment scheme. For example:
4236 for (i=0; i<N; i++)
4237 for (j=0; j<M; j++)
4238 s += a[i+j];
4240 When vectorizing the i-loop in the above example, the step between
4241 consecutive vector loads is 1, and so the misalignment does not remain
4242 fixed across the execution of the inner-loop, and the realignment cannot
4243 be optimized (as illustrated in the following pseudo vectorized loop):
4245 for (i=0; i<N; i+=4)
4246 for (j=0; j<M; j++){
4247 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4248 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4249 // (assuming that we start from an aligned address).
4252 We therefore have to use the unoptimized realignment scheme:
4254 for (i=0; i<N; i+=4)
4255 for (j=k; j<M; j+=4)
4256 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4257 // that the misalignment of the initial address is
4258 // 0).
4260 The loop can then be vectorized as follows:
4262 for (k=0; k<4; k++){
4263 rt = get_realignment_token (&vp[k]);
4264 for (i=0; i<N; i+=4){
4265 v1 = vp[i+k];
4266 for (j=k; j<M; j+=4){
4267 v2 = vp[i+j+VS-1];
4268 va = REALIGN_LOAD <v1,v2,rt>;
4269 vs += va;
4270 v1 = v2;
4273 } */
4275 if (DR_IS_READ (dr))
4277 bool is_packed = false;
4278 tree type = (TREE_TYPE (DR_REF (dr)));
4280 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4281 && (!targetm.vectorize.builtin_mask_for_load
4282 || targetm.vectorize.builtin_mask_for_load ()))
4284 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4285 if ((nested_in_vect_loop
4286 && (TREE_INT_CST_LOW (DR_STEP (dr))
4287 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4288 || !loop_vinfo)
4289 return dr_explicit_realign;
4290 else
4291 return dr_explicit_realign_optimized;
4293 if (!known_alignment_for_access_p (dr))
4295 tree ba = DR_BASE_OBJECT (dr);
4297 if (ba)
4298 is_packed = contains_packed_reference (ba);
4301 if (targetm.vectorize.
4302 support_vector_misalignment (mode, type,
4303 DR_MISALIGNMENT (dr), is_packed))
4304 /* Can't software pipeline the loads, but can at least do them. */
4305 return dr_unaligned_supported;
4307 else
4309 bool is_packed = false;
4310 tree type = (TREE_TYPE (DR_REF (dr)));
4312 if (!known_alignment_for_access_p (dr))
4314 tree ba = DR_BASE_OBJECT (dr);
4316 if (ba)
4317 is_packed = contains_packed_reference (ba);
4320 if (targetm.vectorize.
4321 support_vector_misalignment (mode, type,
4322 DR_MISALIGNMENT (dr), is_packed))
4323 return dr_unaligned_supported;
4326 /* Unsupported. */
4327 return dr_unaligned_unsupported;