2011-08-19 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / tree-vect-data-refs.c
blobe4d32e9d277fe5a83144ffe8fe40551e507907c6
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
49 static bool
50 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 tree vectype, unsigned HOST_WIDE_INT count)
53 enum machine_mode mode, array_mode;
54 bool limit_p;
56 mode = TYPE_MODE (vectype);
57 limit_p = !targetm.array_mode_supported_p (mode, count);
58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
59 MODE_INT, limit_p);
61 if (array_mode == BLKmode)
63 if (vect_print_dump_info (REPORT_DETAILS))
64 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65 GET_MODE_NAME (mode), count);
66 return false;
69 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
71 if (vect_print_dump_info (REPORT_DETAILS))
72 fprintf (vect_dump, "cannot use %s<%s><%s>",
73 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
74 return false;
77 if (vect_print_dump_info (REPORT_DETAILS))
78 fprintf (vect_dump, "can use %s<%s><%s>",
79 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
81 return true;
85 /* Return the smallest scalar part of STMT.
86 This is used to determine the vectype of the stmt. We generally set the
87 vectype according to the type of the result (lhs). For stmts whose
88 result-type is different than the type of the arguments (e.g., demotion,
89 promotion), vectype will be reset appropriately (later). Note that we have
90 to visit the smallest datatype in this function, because that determines the
91 VF. If the smallest datatype in the loop is present only as the rhs of a
92 promotion operation - we'd miss it.
93 Such a case, where a variable of this datatype does not appear in the lhs
94 anywhere in the loop, can only occur if it's an invariant: e.g.:
95 'int_x = (int) short_inv', which we'd expect to have been optimized away by
96 invariant motion. However, we cannot rely on invariant motion to always
97 take invariants out of the loop, and so in the case of promotion we also
98 have to check the rhs.
99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
100 types. */
102 tree
103 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104 HOST_WIDE_INT *rhs_size_unit)
106 tree scalar_type = gimple_expr_type (stmt);
107 HOST_WIDE_INT lhs, rhs;
109 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
111 if (is_gimple_assign (stmt)
112 && (gimple_assign_cast_p (stmt)
113 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119 if (rhs < lhs)
120 scalar_type = rhs_type;
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
125 return scalar_type;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135 gimple next_stmt = first_stmt;
136 int result = 0;
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139 return -1;
141 while (next_stmt && next_stmt != stmt)
143 result++;
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
147 if (next_stmt)
148 return result;
149 else
150 return -1;
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
158 static void
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
162 gimple prev, next;
163 tree next_init;
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169 while (next)
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
174 /* Insert here. */
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177 return;
179 prev = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
196 FIRST_DR = DRB
197 NEXT_DR (DRB) = DRA
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
204 FIRST_DR = DRB
205 NEXT(FIRST_DR) = previous FIRST_DR
206 else
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
212 static void
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
220 tree node_init;
221 gimple node, prev, next, first_stmt;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229 return;
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
238 return;
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246 old_first_stmt)));
247 gimple tmp;
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
259 while (tmp)
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
265 else
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
271 return;
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
278 return;
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
291 else
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
301 while (node)
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305 while (next)
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
310 /* Insert here. */
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313 prev = node;
314 break;
316 prev = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
319 if (!next)
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324 prev = node;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
337 static bool
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342 gimple earlier_stmt;
344 /* We only call this function for pairs of loads and stores, but we verify
345 it here. */
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 if (DR_IS_READ (dra))
349 return false;
350 else
351 return true;
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
358 return true;
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
367 return true;
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
374 return false;
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383 return false;
385 return true;
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
394 static bool
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
406 return false;
408 /* Check:
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
412 data-refs' inits. */
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
420 return false;
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
426 if (init_a > init_b)
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
432 if (step && (init_a - init_b) > step)
433 return false;
435 if (diff_mod_size == 0)
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
445 return true;
448 else
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
454 if (step && (init_b - init_a) > step)
455 return false;
457 if (diff_mod_size == 0)
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
467 return true;
471 return false;
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
478 static bool
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489 return true;
490 else
491 return false;
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
497 static bool
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504 return true;
505 else
506 return false;
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
513 static bool
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519 return false;
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
529 if (optimize_loop_nest_for_size_p (loop))
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
533 return false;
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
537 if (loop->inner)
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541 return false;
544 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
545 return true;
549 /* Function vect_analyze_data_ref_dependence.
551 Return TRUE if there (might) exist a dependence between a memory-reference
552 DRA and a memory-reference DRB. When versioning for alias may check a
553 dependence at run-time, return FALSE. Adjust *MAX_VF according to
554 the data dependence. */
556 static bool
557 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
558 loop_vec_info loop_vinfo, int *max_vf,
559 bool *data_dependence_in_bb)
561 unsigned int i;
562 struct loop *loop = NULL;
563 struct data_reference *dra = DDR_A (ddr);
564 struct data_reference *drb = DDR_B (ddr);
565 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
566 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
567 lambda_vector dist_v;
568 unsigned int loop_depth;
570 /* Don't bother to analyze statements marked as unvectorizable. */
571 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
572 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
573 return false;
575 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
577 /* Independent data accesses. */
578 vect_check_interleaving (dra, drb);
579 return false;
582 if (loop_vinfo)
583 loop = LOOP_VINFO_LOOP (loop_vinfo);
585 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
586 return false;
588 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
590 if (loop_vinfo)
592 if (vect_print_dump_info (REPORT_DR_DETAILS))
594 fprintf (vect_dump, "versioning for alias required: "
595 "can't determine dependence between ");
596 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
597 fprintf (vect_dump, " and ");
598 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
601 /* Add to list of ddrs that need to be tested at run-time. */
602 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
605 /* When vectorizing a basic block unknown depnedence can still mean
606 strided access. */
607 if (vect_check_interleaving (dra, drb))
608 return false;
610 if (vect_print_dump_info (REPORT_DR_DETAILS))
612 fprintf (vect_dump, "can't determine dependence between ");
613 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
614 fprintf (vect_dump, " and ");
615 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
618 /* We do not vectorize basic blocks with write-write dependencies. */
619 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
620 return true;
622 /* We deal with read-write dependencies in basic blocks later (by
623 verifying that all the loads in the basic block are before all the
624 stores). */
625 *data_dependence_in_bb = true;
626 return false;
629 /* Versioning for alias is not yet supported for basic block SLP, and
630 dependence distance is unapplicable, hence, in case of known data
631 dependence, basic block vectorization is impossible for now. */
632 if (!loop_vinfo)
634 if (dra != drb && vect_check_interleaving (dra, drb))
635 return false;
637 if (vect_print_dump_info (REPORT_DR_DETAILS))
639 fprintf (vect_dump, "determined dependence between ");
640 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
641 fprintf (vect_dump, " and ");
642 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
645 /* Do not vectorize basic blcoks with write-write dependences. */
646 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
647 return true;
649 /* Check if this dependence is allowed in basic block vectorization. */
650 return vect_drs_dependent_in_basic_block (dra, drb);
653 /* Loop-based vectorization and known data dependence. */
654 if (DDR_NUM_DIST_VECTS (ddr) == 0)
656 if (vect_print_dump_info (REPORT_DR_DETAILS))
658 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
659 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
660 fprintf (vect_dump, " and ");
661 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
663 /* Add to list of ddrs that need to be tested at run-time. */
664 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
667 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
668 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
670 int dist = dist_v[loop_depth];
672 if (vect_print_dump_info (REPORT_DR_DETAILS))
673 fprintf (vect_dump, "dependence distance = %d.", dist);
675 if (dist == 0)
677 if (vect_print_dump_info (REPORT_DR_DETAILS))
679 fprintf (vect_dump, "dependence distance == 0 between ");
680 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
681 fprintf (vect_dump, " and ");
682 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
685 /* For interleaving, mark that there is a read-write dependency if
686 necessary. We check before that one of the data-refs is store. */
687 if (DR_IS_READ (dra))
688 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
689 else
691 if (DR_IS_READ (drb))
692 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
695 continue;
698 if (dist > 0 && DDR_REVERSED_P (ddr))
700 /* If DDR_REVERSED_P the order of the data-refs in DDR was
701 reversed (to make distance vector positive), and the actual
702 distance is negative. */
703 if (vect_print_dump_info (REPORT_DR_DETAILS))
704 fprintf (vect_dump, "dependence distance negative.");
705 continue;
708 if (abs (dist) >= 2
709 && abs (dist) < *max_vf)
711 /* The dependence distance requires reduction of the maximal
712 vectorization factor. */
713 *max_vf = abs (dist);
714 if (vect_print_dump_info (REPORT_DR_DETAILS))
715 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
716 *max_vf);
719 if (abs (dist) >= *max_vf)
721 /* Dependence distance does not create dependence, as far as
722 vectorization is concerned, in this case. */
723 if (vect_print_dump_info (REPORT_DR_DETAILS))
724 fprintf (vect_dump, "dependence distance >= VF.");
725 continue;
728 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
730 fprintf (vect_dump, "not vectorized, possible dependence "
731 "between data-refs ");
732 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
733 fprintf (vect_dump, " and ");
734 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
737 return true;
740 return false;
743 /* Function vect_analyze_data_ref_dependences.
745 Examine all the data references in the loop, and make sure there do not
746 exist any data dependences between them. Set *MAX_VF according to
747 the maximum vectorization factor the data dependences allow. */
749 bool
750 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
751 bb_vec_info bb_vinfo, int *max_vf,
752 bool *data_dependence_in_bb)
754 unsigned int i;
755 VEC (ddr_p, heap) *ddrs = NULL;
756 struct data_dependence_relation *ddr;
758 if (vect_print_dump_info (REPORT_DETAILS))
759 fprintf (vect_dump, "=== vect_analyze_dependences ===");
761 if (loop_vinfo)
762 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
763 else
764 ddrs = BB_VINFO_DDRS (bb_vinfo);
766 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
767 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
768 data_dependence_in_bb))
769 return false;
771 return true;
775 /* Function vect_compute_data_ref_alignment
777 Compute the misalignment of the data reference DR.
779 Output:
780 1. If during the misalignment computation it is found that the data reference
781 cannot be vectorized then false is returned.
782 2. DR_MISALIGNMENT (DR) is defined.
784 FOR NOW: No analysis is actually performed. Misalignment is calculated
785 only for trivial cases. TODO. */
787 static bool
788 vect_compute_data_ref_alignment (struct data_reference *dr)
790 gimple stmt = DR_STMT (dr);
791 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
792 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
793 struct loop *loop = NULL;
794 tree ref = DR_REF (dr);
795 tree vectype;
796 tree base, base_addr;
797 bool base_aligned;
798 tree misalign;
799 tree aligned_to, alignment;
801 if (vect_print_dump_info (REPORT_DETAILS))
802 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
804 if (loop_vinfo)
805 loop = LOOP_VINFO_LOOP (loop_vinfo);
807 /* Initialize misalignment to unknown. */
808 SET_DR_MISALIGNMENT (dr, -1);
810 misalign = DR_INIT (dr);
811 aligned_to = DR_ALIGNED_TO (dr);
812 base_addr = DR_BASE_ADDRESS (dr);
813 vectype = STMT_VINFO_VECTYPE (stmt_info);
815 /* In case the dataref is in an inner-loop of the loop that is being
816 vectorized (LOOP), we use the base and misalignment information
817 relative to the outer-loop (LOOP). This is ok only if the misalignment
818 stays the same throughout the execution of the inner-loop, which is why
819 we have to check that the stride of the dataref in the inner-loop evenly
820 divides by the vector size. */
821 if (loop && nested_in_vect_loop_p (loop, stmt))
823 tree step = DR_STEP (dr);
824 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
826 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
828 if (vect_print_dump_info (REPORT_ALIGNMENT))
829 fprintf (vect_dump, "inner step divides the vector-size.");
830 misalign = STMT_VINFO_DR_INIT (stmt_info);
831 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
832 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
834 else
836 if (vect_print_dump_info (REPORT_ALIGNMENT))
837 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
838 misalign = NULL_TREE;
842 base = build_fold_indirect_ref (base_addr);
843 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
845 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
846 || !misalign)
848 if (vect_print_dump_info (REPORT_ALIGNMENT))
850 fprintf (vect_dump, "Unknown alignment for access: ");
851 print_generic_expr (vect_dump, base, TDF_SLIM);
853 return true;
856 if ((DECL_P (base)
857 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
858 alignment) >= 0)
859 || (TREE_CODE (base_addr) == SSA_NAME
860 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
861 TREE_TYPE (base_addr)))),
862 alignment) >= 0)
863 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
864 base_aligned = true;
865 else
866 base_aligned = false;
868 if (!base_aligned)
870 /* Do not change the alignment of global variables if
871 flag_section_anchors is enabled. */
872 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
873 || (TREE_STATIC (base) && flag_section_anchors))
875 if (vect_print_dump_info (REPORT_DETAILS))
877 fprintf (vect_dump, "can't force alignment of ref: ");
878 print_generic_expr (vect_dump, ref, TDF_SLIM);
880 return true;
883 /* Force the alignment of the decl.
884 NOTE: This is the only change to the code we make during
885 the analysis phase, before deciding to vectorize the loop. */
886 if (vect_print_dump_info (REPORT_DETAILS))
888 fprintf (vect_dump, "force alignment of ");
889 print_generic_expr (vect_dump, ref, TDF_SLIM);
892 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
893 DECL_USER_ALIGN (base) = 1;
896 /* At this point we assume that the base is aligned. */
897 gcc_assert (base_aligned
898 || (TREE_CODE (base) == VAR_DECL
899 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
901 /* If this is a backward running DR then first access in the larger
902 vectype actually is N-1 elements before the address in the DR.
903 Adjust misalign accordingly. */
904 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
906 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
907 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
908 otherwise we wouldn't be here. */
909 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
910 /* PLUS because DR_STEP was negative. */
911 misalign = size_binop (PLUS_EXPR, misalign, offset);
914 /* Modulo alignment. */
915 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
917 if (!host_integerp (misalign, 1))
919 /* Negative or overflowed misalignment value. */
920 if (vect_print_dump_info (REPORT_DETAILS))
921 fprintf (vect_dump, "unexpected misalign value");
922 return false;
925 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
927 if (vect_print_dump_info (REPORT_DETAILS))
929 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
930 print_generic_expr (vect_dump, ref, TDF_SLIM);
933 return true;
937 /* Function vect_compute_data_refs_alignment
939 Compute the misalignment of data references in the loop.
940 Return FALSE if a data reference is found that cannot be vectorized. */
942 static bool
943 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
944 bb_vec_info bb_vinfo)
946 VEC (data_reference_p, heap) *datarefs;
947 struct data_reference *dr;
948 unsigned int i;
950 if (loop_vinfo)
951 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
952 else
953 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
955 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
956 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
957 && !vect_compute_data_ref_alignment (dr))
959 if (bb_vinfo)
961 /* Mark unsupported statement as unvectorizable. */
962 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
963 continue;
965 else
966 return false;
969 return true;
973 /* Function vect_update_misalignment_for_peel
975 DR - the data reference whose misalignment is to be adjusted.
976 DR_PEEL - the data reference whose misalignment is being made
977 zero in the vector loop by the peel.
978 NPEEL - the number of iterations in the peel loop if the misalignment
979 of DR_PEEL is known at compile time. */
981 static void
982 vect_update_misalignment_for_peel (struct data_reference *dr,
983 struct data_reference *dr_peel, int npeel)
985 unsigned int i;
986 VEC(dr_p,heap) *same_align_drs;
987 struct data_reference *current_dr;
988 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
989 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
990 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
991 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
993 /* For interleaved data accesses the step in the loop must be multiplied by
994 the size of the interleaving group. */
995 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
996 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
997 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
998 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1000 /* It can be assumed that the data refs with the same alignment as dr_peel
1001 are aligned in the vector loop. */
1002 same_align_drs
1003 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1004 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1006 if (current_dr != dr)
1007 continue;
1008 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1009 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1010 SET_DR_MISALIGNMENT (dr, 0);
1011 return;
1014 if (known_alignment_for_access_p (dr)
1015 && known_alignment_for_access_p (dr_peel))
1017 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1018 int misal = DR_MISALIGNMENT (dr);
1019 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1020 misal += negative ? -npeel * dr_size : npeel * dr_size;
1021 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1022 SET_DR_MISALIGNMENT (dr, misal);
1023 return;
1026 if (vect_print_dump_info (REPORT_DETAILS))
1027 fprintf (vect_dump, "Setting misalignment to -1.");
1028 SET_DR_MISALIGNMENT (dr, -1);
1032 /* Function vect_verify_datarefs_alignment
1034 Return TRUE if all data references in the loop can be
1035 handled with respect to alignment. */
1037 bool
1038 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1040 VEC (data_reference_p, heap) *datarefs;
1041 struct data_reference *dr;
1042 enum dr_alignment_support supportable_dr_alignment;
1043 unsigned int i;
1045 if (loop_vinfo)
1046 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1047 else
1048 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1050 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1052 gimple stmt = DR_STMT (dr);
1053 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1055 /* For interleaving, only the alignment of the first access matters.
1056 Skip statements marked as not vectorizable. */
1057 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1058 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1059 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1060 continue;
1062 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1063 if (!supportable_dr_alignment)
1065 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1067 if (DR_IS_READ (dr))
1068 fprintf (vect_dump,
1069 "not vectorized: unsupported unaligned load.");
1070 else
1071 fprintf (vect_dump,
1072 "not vectorized: unsupported unaligned store.");
1074 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1076 return false;
1078 if (supportable_dr_alignment != dr_aligned
1079 && vect_print_dump_info (REPORT_ALIGNMENT))
1080 fprintf (vect_dump, "Vectorizing an unaligned access.");
1082 return true;
1086 /* Function vector_alignment_reachable_p
1088 Return true if vector alignment for DR is reachable by peeling
1089 a few loop iterations. Return false otherwise. */
1091 static bool
1092 vector_alignment_reachable_p (struct data_reference *dr)
1094 gimple stmt = DR_STMT (dr);
1095 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1096 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1098 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1100 /* For interleaved access we peel only if number of iterations in
1101 the prolog loop ({VF - misalignment}), is a multiple of the
1102 number of the interleaved accesses. */
1103 int elem_size, mis_in_elements;
1104 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1106 /* FORNOW: handle only known alignment. */
1107 if (!known_alignment_for_access_p (dr))
1108 return false;
1110 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1111 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1113 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1114 return false;
1117 /* If misalignment is known at the compile time then allow peeling
1118 only if natural alignment is reachable through peeling. */
1119 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1121 HOST_WIDE_INT elmsize =
1122 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1123 if (vect_print_dump_info (REPORT_DETAILS))
1125 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1126 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1128 if (DR_MISALIGNMENT (dr) % elmsize)
1130 if (vect_print_dump_info (REPORT_DETAILS))
1131 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1132 return false;
1136 if (!known_alignment_for_access_p (dr))
1138 tree type = (TREE_TYPE (DR_REF (dr)));
1139 tree ba = DR_BASE_OBJECT (dr);
1140 bool is_packed = false;
1142 if (ba)
1143 is_packed = contains_packed_reference (ba);
1145 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1146 is_packed = true;
1148 if (vect_print_dump_info (REPORT_DETAILS))
1149 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1150 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1151 return true;
1152 else
1153 return false;
1156 return true;
1160 /* Calculate the cost of the memory access represented by DR. */
1162 static void
1163 vect_get_data_access_cost (struct data_reference *dr,
1164 unsigned int *inside_cost,
1165 unsigned int *outside_cost)
1167 gimple stmt = DR_STMT (dr);
1168 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1169 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1170 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1171 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1172 int ncopies = vf / nunits;
1173 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1175 if (!supportable_dr_alignment)
1176 *inside_cost = VECT_MAX_COST;
1177 else
1179 if (DR_IS_READ (dr))
1180 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1181 else
1182 vect_get_store_cost (dr, ncopies, inside_cost);
1185 if (vect_print_dump_info (REPORT_COST))
1186 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1187 "outside_cost = %d.", *inside_cost, *outside_cost);
1191 static hashval_t
1192 vect_peeling_hash (const void *elem)
1194 const struct _vect_peel_info *peel_info;
1196 peel_info = (const struct _vect_peel_info *) elem;
1197 return (hashval_t) peel_info->npeel;
1201 static int
1202 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1204 const struct _vect_peel_info *a, *b;
1206 a = (const struct _vect_peel_info *) elem1;
1207 b = (const struct _vect_peel_info *) elem2;
1208 return (a->npeel == b->npeel);
1212 /* Insert DR into peeling hash table with NPEEL as key. */
1214 static void
1215 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1216 int npeel)
1218 struct _vect_peel_info elem, *slot;
1219 void **new_slot;
1220 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1222 elem.npeel = npeel;
1223 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1224 &elem);
1225 if (slot)
1226 slot->count++;
1227 else
1229 slot = XNEW (struct _vect_peel_info);
1230 slot->npeel = npeel;
1231 slot->dr = dr;
1232 slot->count = 1;
1233 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1234 INSERT);
1235 *new_slot = slot;
1238 if (!supportable_dr_alignment && !flag_vect_cost_model)
1239 slot->count += VECT_MAX_COST;
1243 /* Traverse peeling hash table to find peeling option that aligns maximum
1244 number of data accesses. */
1246 static int
1247 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1249 vect_peel_info elem = (vect_peel_info) *slot;
1250 vect_peel_extended_info max = (vect_peel_extended_info) data;
1252 if (elem->count > max->peel_info.count
1253 || (elem->count == max->peel_info.count
1254 && max->peel_info.npeel > elem->npeel))
1256 max->peel_info.npeel = elem->npeel;
1257 max->peel_info.count = elem->count;
1258 max->peel_info.dr = elem->dr;
1261 return 1;
1265 /* Traverse peeling hash table and calculate cost for each peeling option.
1266 Find the one with the lowest cost. */
1268 static int
1269 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1271 vect_peel_info elem = (vect_peel_info) *slot;
1272 vect_peel_extended_info min = (vect_peel_extended_info) data;
1273 int save_misalignment, dummy;
1274 unsigned int inside_cost = 0, outside_cost = 0, i;
1275 gimple stmt = DR_STMT (elem->dr);
1276 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1277 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1278 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1279 struct data_reference *dr;
1281 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1283 stmt = DR_STMT (dr);
1284 stmt_info = vinfo_for_stmt (stmt);
1285 /* For interleaving, only the alignment of the first access
1286 matters. */
1287 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1288 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1289 continue;
1291 save_misalignment = DR_MISALIGNMENT (dr);
1292 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1293 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1294 SET_DR_MISALIGNMENT (dr, save_misalignment);
1297 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1298 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1300 if (inside_cost < min->inside_cost
1301 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1303 min->inside_cost = inside_cost;
1304 min->outside_cost = outside_cost;
1305 min->peel_info.dr = elem->dr;
1306 min->peel_info.npeel = elem->npeel;
1309 return 1;
1313 /* Choose best peeling option by traversing peeling hash table and either
1314 choosing an option with the lowest cost (if cost model is enabled) or the
1315 option that aligns as many accesses as possible. */
1317 static struct data_reference *
1318 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1319 unsigned int *npeel)
1321 struct _vect_peel_extended_info res;
1323 res.peel_info.dr = NULL;
1325 if (flag_vect_cost_model)
1327 res.inside_cost = INT_MAX;
1328 res.outside_cost = INT_MAX;
1329 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1330 vect_peeling_hash_get_lowest_cost, &res);
1332 else
1334 res.peel_info.count = 0;
1335 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1336 vect_peeling_hash_get_most_frequent, &res);
1339 *npeel = res.peel_info.npeel;
1340 return res.peel_info.dr;
1344 /* Function vect_enhance_data_refs_alignment
1346 This pass will use loop versioning and loop peeling in order to enhance
1347 the alignment of data references in the loop.
1349 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1350 original loop is to be vectorized. Any other loops that are created by
1351 the transformations performed in this pass - are not supposed to be
1352 vectorized. This restriction will be relaxed.
1354 This pass will require a cost model to guide it whether to apply peeling
1355 or versioning or a combination of the two. For example, the scheme that
1356 intel uses when given a loop with several memory accesses, is as follows:
1357 choose one memory access ('p') which alignment you want to force by doing
1358 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1359 other accesses are not necessarily aligned, or (2) use loop versioning to
1360 generate one loop in which all accesses are aligned, and another loop in
1361 which only 'p' is necessarily aligned.
1363 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1364 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1365 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1367 Devising a cost model is the most critical aspect of this work. It will
1368 guide us on which access to peel for, whether to use loop versioning, how
1369 many versions to create, etc. The cost model will probably consist of
1370 generic considerations as well as target specific considerations (on
1371 powerpc for example, misaligned stores are more painful than misaligned
1372 loads).
1374 Here are the general steps involved in alignment enhancements:
1376 -- original loop, before alignment analysis:
1377 for (i=0; i<N; i++){
1378 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1379 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1382 -- After vect_compute_data_refs_alignment:
1383 for (i=0; i<N; i++){
1384 x = q[i]; # DR_MISALIGNMENT(q) = 3
1385 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1388 -- Possibility 1: we do loop versioning:
1389 if (p is aligned) {
1390 for (i=0; i<N; i++){ # loop 1A
1391 x = q[i]; # DR_MISALIGNMENT(q) = 3
1392 p[i] = y; # DR_MISALIGNMENT(p) = 0
1395 else {
1396 for (i=0; i<N; i++){ # loop 1B
1397 x = q[i]; # DR_MISALIGNMENT(q) = 3
1398 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1402 -- Possibility 2: we do loop peeling:
1403 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1404 x = q[i];
1405 p[i] = y;
1407 for (i = 3; i < N; i++){ # loop 2A
1408 x = q[i]; # DR_MISALIGNMENT(q) = 0
1409 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1412 -- Possibility 3: combination of loop peeling and versioning:
1413 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1414 x = q[i];
1415 p[i] = y;
1417 if (p is aligned) {
1418 for (i = 3; i<N; i++){ # loop 3A
1419 x = q[i]; # DR_MISALIGNMENT(q) = 0
1420 p[i] = y; # DR_MISALIGNMENT(p) = 0
1423 else {
1424 for (i = 3; i<N; i++){ # loop 3B
1425 x = q[i]; # DR_MISALIGNMENT(q) = 0
1426 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1430 These loops are later passed to loop_transform to be vectorized. The
1431 vectorizer will use the alignment information to guide the transformation
1432 (whether to generate regular loads/stores, or with special handling for
1433 misalignment). */
1435 bool
1436 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1438 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1439 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1440 enum dr_alignment_support supportable_dr_alignment;
1441 struct data_reference *dr0 = NULL, *first_store = NULL;
1442 struct data_reference *dr;
1443 unsigned int i, j;
1444 bool do_peeling = false;
1445 bool do_versioning = false;
1446 bool stat;
1447 gimple stmt;
1448 stmt_vec_info stmt_info;
1449 int vect_versioning_for_alias_required;
1450 unsigned int npeel = 0;
1451 bool all_misalignments_unknown = true;
1452 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1453 unsigned possible_npeel_number = 1;
1454 tree vectype;
1455 unsigned int nelements, mis, same_align_drs_max = 0;
1457 if (vect_print_dump_info (REPORT_DETAILS))
1458 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1460 /* While cost model enhancements are expected in the future, the high level
1461 view of the code at this time is as follows:
1463 A) If there is a misaligned access then see if peeling to align
1464 this access can make all data references satisfy
1465 vect_supportable_dr_alignment. If so, update data structures
1466 as needed and return true.
1468 B) If peeling wasn't possible and there is a data reference with an
1469 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1470 then see if loop versioning checks can be used to make all data
1471 references satisfy vect_supportable_dr_alignment. If so, update
1472 data structures as needed and return true.
1474 C) If neither peeling nor versioning were successful then return false if
1475 any data reference does not satisfy vect_supportable_dr_alignment.
1477 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1479 Note, Possibility 3 above (which is peeling and versioning together) is not
1480 being done at this time. */
1482 /* (1) Peeling to force alignment. */
1484 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1485 Considerations:
1486 + How many accesses will become aligned due to the peeling
1487 - How many accesses will become unaligned due to the peeling,
1488 and the cost of misaligned accesses.
1489 - The cost of peeling (the extra runtime checks, the increase
1490 in code size). */
1492 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1494 stmt = DR_STMT (dr);
1495 stmt_info = vinfo_for_stmt (stmt);
1497 if (!STMT_VINFO_RELEVANT (stmt_info))
1498 continue;
1500 /* For interleaving, only the alignment of the first access
1501 matters. */
1502 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1503 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1504 continue;
1506 /* For invariant accesses there is nothing to enhance. */
1507 if (integer_zerop (DR_STEP (dr)))
1508 continue;
1510 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1511 do_peeling = vector_alignment_reachable_p (dr);
1512 if (do_peeling)
1514 if (known_alignment_for_access_p (dr))
1516 unsigned int npeel_tmp;
1517 bool negative = tree_int_cst_compare (DR_STEP (dr),
1518 size_zero_node) < 0;
1520 /* Save info about DR in the hash table. */
1521 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1522 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1523 htab_create (1, vect_peeling_hash,
1524 vect_peeling_hash_eq, free);
1526 vectype = STMT_VINFO_VECTYPE (stmt_info);
1527 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1528 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1529 TREE_TYPE (DR_REF (dr))));
1530 npeel_tmp = (negative
1531 ? (mis - nelements) : (nelements - mis))
1532 & (nelements - 1);
1534 /* For multiple types, it is possible that the bigger type access
1535 will have more than one peeling option. E.g., a loop with two
1536 types: one of size (vector size / 4), and the other one of
1537 size (vector size / 8). Vectorization factor will 8. If both
1538 access are misaligned by 3, the first one needs one scalar
1539 iteration to be aligned, and the second one needs 5. But the
1540 the first one will be aligned also by peeling 5 scalar
1541 iterations, and in that case both accesses will be aligned.
1542 Hence, except for the immediate peeling amount, we also want
1543 to try to add full vector size, while we don't exceed
1544 vectorization factor.
1545 We do this automtically for cost model, since we calculate cost
1546 for every peeling option. */
1547 if (!flag_vect_cost_model)
1548 possible_npeel_number = vf /nelements;
1550 /* Handle the aligned case. We may decide to align some other
1551 access, making DR unaligned. */
1552 if (DR_MISALIGNMENT (dr) == 0)
1554 npeel_tmp = 0;
1555 if (!flag_vect_cost_model)
1556 possible_npeel_number++;
1559 for (j = 0; j < possible_npeel_number; j++)
1561 gcc_assert (npeel_tmp <= vf);
1562 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1563 npeel_tmp += nelements;
1566 all_misalignments_unknown = false;
1567 /* Data-ref that was chosen for the case that all the
1568 misalignments are unknown is not relevant anymore, since we
1569 have a data-ref with known alignment. */
1570 dr0 = NULL;
1572 else
1574 /* If we don't know all the misalignment values, we prefer
1575 peeling for data-ref that has maximum number of data-refs
1576 with the same alignment, unless the target prefers to align
1577 stores over load. */
1578 if (all_misalignments_unknown)
1580 if (same_align_drs_max < VEC_length (dr_p,
1581 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1582 || !dr0)
1584 same_align_drs_max = VEC_length (dr_p,
1585 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1586 dr0 = dr;
1589 if (!first_store && DR_IS_WRITE (dr))
1590 first_store = dr;
1593 /* If there are both known and unknown misaligned accesses in the
1594 loop, we choose peeling amount according to the known
1595 accesses. */
1598 if (!supportable_dr_alignment)
1600 dr0 = dr;
1601 if (!first_store && DR_IS_WRITE (dr))
1602 first_store = dr;
1606 else
1608 if (!aligned_access_p (dr))
1610 if (vect_print_dump_info (REPORT_DETAILS))
1611 fprintf (vect_dump, "vector alignment may not be reachable");
1613 break;
1618 vect_versioning_for_alias_required
1619 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1621 /* Temporarily, if versioning for alias is required, we disable peeling
1622 until we support peeling and versioning. Often peeling for alignment
1623 will require peeling for loop-bound, which in turn requires that we
1624 know how to adjust the loop ivs after the loop. */
1625 if (vect_versioning_for_alias_required
1626 || !vect_can_advance_ivs_p (loop_vinfo)
1627 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1628 do_peeling = false;
1630 if (do_peeling && all_misalignments_unknown
1631 && vect_supportable_dr_alignment (dr0, false))
1634 /* Check if the target requires to prefer stores over loads, i.e., if
1635 misaligned stores are more expensive than misaligned loads (taking
1636 drs with same alignment into account). */
1637 if (first_store && DR_IS_READ (dr0))
1639 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1640 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1641 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1642 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1644 vect_get_data_access_cost (dr0, &load_inside_cost,
1645 &load_outside_cost);
1646 vect_get_data_access_cost (first_store, &store_inside_cost,
1647 &store_outside_cost);
1649 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1650 aligning the load DR0). */
1651 load_inside_penalty = store_inside_cost;
1652 load_outside_penalty = store_outside_cost;
1653 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1654 (vinfo_for_stmt (DR_STMT (first_store))),
1655 i, dr);
1656 i++)
1657 if (DR_IS_READ (dr))
1659 load_inside_penalty += load_inside_cost;
1660 load_outside_penalty += load_outside_cost;
1662 else
1664 load_inside_penalty += store_inside_cost;
1665 load_outside_penalty += store_outside_cost;
1668 /* Calculate the penalty for leaving DR0 unaligned (by
1669 aligning the FIRST_STORE). */
1670 store_inside_penalty = load_inside_cost;
1671 store_outside_penalty = load_outside_cost;
1672 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1673 (vinfo_for_stmt (DR_STMT (dr0))),
1674 i, dr);
1675 i++)
1676 if (DR_IS_READ (dr))
1678 store_inside_penalty += load_inside_cost;
1679 store_outside_penalty += load_outside_cost;
1681 else
1683 store_inside_penalty += store_inside_cost;
1684 store_outside_penalty += store_outside_cost;
1687 if (load_inside_penalty > store_inside_penalty
1688 || (load_inside_penalty == store_inside_penalty
1689 && load_outside_penalty > store_outside_penalty))
1690 dr0 = first_store;
1693 /* In case there are only loads with different unknown misalignments, use
1694 peeling only if it may help to align other accesses in the loop. */
1695 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1696 (vinfo_for_stmt (DR_STMT (dr0))))
1697 && vect_supportable_dr_alignment (dr0, false)
1698 != dr_unaligned_supported)
1699 do_peeling = false;
1702 if (do_peeling && !dr0)
1704 /* Peeling is possible, but there is no data access that is not supported
1705 unless aligned. So we try to choose the best possible peeling. */
1707 /* We should get here only if there are drs with known misalignment. */
1708 gcc_assert (!all_misalignments_unknown);
1710 /* Choose the best peeling from the hash table. */
1711 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1712 if (!dr0 || !npeel)
1713 do_peeling = false;
1716 if (do_peeling)
1718 stmt = DR_STMT (dr0);
1719 stmt_info = vinfo_for_stmt (stmt);
1720 vectype = STMT_VINFO_VECTYPE (stmt_info);
1721 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1723 if (known_alignment_for_access_p (dr0))
1725 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1726 size_zero_node) < 0;
1727 if (!npeel)
1729 /* Since it's known at compile time, compute the number of
1730 iterations in the peeled loop (the peeling factor) for use in
1731 updating DR_MISALIGNMENT values. The peeling factor is the
1732 vectorization factor minus the misalignment as an element
1733 count. */
1734 mis = DR_MISALIGNMENT (dr0);
1735 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1736 npeel = ((negative ? mis - nelements : nelements - mis)
1737 & (nelements - 1));
1740 /* For interleaved data access every iteration accesses all the
1741 members of the group, therefore we divide the number of iterations
1742 by the group size. */
1743 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1744 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1745 npeel /= GROUP_SIZE (stmt_info);
1747 if (vect_print_dump_info (REPORT_DETAILS))
1748 fprintf (vect_dump, "Try peeling by %d", npeel);
1751 /* Ensure that all data refs can be vectorized after the peel. */
1752 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1754 int save_misalignment;
1756 if (dr == dr0)
1757 continue;
1759 stmt = DR_STMT (dr);
1760 stmt_info = vinfo_for_stmt (stmt);
1761 /* For interleaving, only the alignment of the first access
1762 matters. */
1763 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1764 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1765 continue;
1767 save_misalignment = DR_MISALIGNMENT (dr);
1768 vect_update_misalignment_for_peel (dr, dr0, npeel);
1769 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1770 SET_DR_MISALIGNMENT (dr, save_misalignment);
1772 if (!supportable_dr_alignment)
1774 do_peeling = false;
1775 break;
1779 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1781 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1782 if (!stat)
1783 do_peeling = false;
1784 else
1785 return stat;
1788 if (do_peeling)
1790 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1791 If the misalignment of DR_i is identical to that of dr0 then set
1792 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1793 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1794 by the peeling factor times the element size of DR_i (MOD the
1795 vectorization factor times the size). Otherwise, the
1796 misalignment of DR_i must be set to unknown. */
1797 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1798 if (dr != dr0)
1799 vect_update_misalignment_for_peel (dr, dr0, npeel);
1801 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1802 if (npeel)
1803 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1804 else
1805 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1806 SET_DR_MISALIGNMENT (dr0, 0);
1807 if (vect_print_dump_info (REPORT_ALIGNMENT))
1808 fprintf (vect_dump, "Alignment of access forced using peeling.");
1810 if (vect_print_dump_info (REPORT_DETAILS))
1811 fprintf (vect_dump, "Peeling for alignment will be applied.");
1813 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1814 gcc_assert (stat);
1815 return stat;
1820 /* (2) Versioning to force alignment. */
1822 /* Try versioning if:
1823 1) flag_tree_vect_loop_version is TRUE
1824 2) optimize loop for speed
1825 3) there is at least one unsupported misaligned data ref with an unknown
1826 misalignment, and
1827 4) all misaligned data refs with a known misalignment are supported, and
1828 5) the number of runtime alignment checks is within reason. */
1830 do_versioning =
1831 flag_tree_vect_loop_version
1832 && optimize_loop_nest_for_speed_p (loop)
1833 && (!loop->inner); /* FORNOW */
1835 if (do_versioning)
1837 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1839 stmt = DR_STMT (dr);
1840 stmt_info = vinfo_for_stmt (stmt);
1842 /* For interleaving, only the alignment of the first access
1843 matters. */
1844 if (aligned_access_p (dr)
1845 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1846 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1847 continue;
1849 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1851 if (!supportable_dr_alignment)
1853 gimple stmt;
1854 int mask;
1855 tree vectype;
1857 if (known_alignment_for_access_p (dr)
1858 || VEC_length (gimple,
1859 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1860 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1862 do_versioning = false;
1863 break;
1866 stmt = DR_STMT (dr);
1867 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1868 gcc_assert (vectype);
1870 /* The rightmost bits of an aligned address must be zeros.
1871 Construct the mask needed for this test. For example,
1872 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1873 mask must be 15 = 0xf. */
1874 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1876 /* FORNOW: use the same mask to test all potentially unaligned
1877 references in the loop. The vectorizer currently supports
1878 a single vector size, see the reference to
1879 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1880 vectorization factor is computed. */
1881 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1882 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1883 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1884 VEC_safe_push (gimple, heap,
1885 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1886 DR_STMT (dr));
1890 /* Versioning requires at least one misaligned data reference. */
1891 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1892 do_versioning = false;
1893 else if (!do_versioning)
1894 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1897 if (do_versioning)
1899 VEC(gimple,heap) *may_misalign_stmts
1900 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1901 gimple stmt;
1903 /* It can now be assumed that the data references in the statements
1904 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1905 of the loop being vectorized. */
1906 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1908 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1909 dr = STMT_VINFO_DATA_REF (stmt_info);
1910 SET_DR_MISALIGNMENT (dr, 0);
1911 if (vect_print_dump_info (REPORT_ALIGNMENT))
1912 fprintf (vect_dump, "Alignment of access forced using versioning.");
1915 if (vect_print_dump_info (REPORT_DETAILS))
1916 fprintf (vect_dump, "Versioning for alignment will be applied.");
1918 /* Peeling and versioning can't be done together at this time. */
1919 gcc_assert (! (do_peeling && do_versioning));
1921 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1922 gcc_assert (stat);
1923 return stat;
1926 /* This point is reached if neither peeling nor versioning is being done. */
1927 gcc_assert (! (do_peeling || do_versioning));
1929 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1930 return stat;
1934 /* Function vect_find_same_alignment_drs.
1936 Update group and alignment relations according to the chosen
1937 vectorization factor. */
1939 static void
1940 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1941 loop_vec_info loop_vinfo)
1943 unsigned int i;
1944 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1945 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1946 struct data_reference *dra = DDR_A (ddr);
1947 struct data_reference *drb = DDR_B (ddr);
1948 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1949 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1950 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1951 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1952 lambda_vector dist_v;
1953 unsigned int loop_depth;
1955 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1956 return;
1958 if (dra == drb)
1959 return;
1961 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1962 return;
1964 /* Loop-based vectorization and known data dependence. */
1965 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1966 return;
1968 /* Data-dependence analysis reports a distance vector of zero
1969 for data-references that overlap only in the first iteration
1970 but have different sign step (see PR45764).
1971 So as a sanity check require equal DR_STEP. */
1972 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1973 return;
1975 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1976 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1978 int dist = dist_v[loop_depth];
1980 if (vect_print_dump_info (REPORT_DR_DETAILS))
1981 fprintf (vect_dump, "dependence distance = %d.", dist);
1983 /* Same loop iteration. */
1984 if (dist == 0
1985 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1987 /* Two references with distance zero have the same alignment. */
1988 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1989 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1990 if (vect_print_dump_info (REPORT_ALIGNMENT))
1991 fprintf (vect_dump, "accesses have the same alignment.");
1992 if (vect_print_dump_info (REPORT_DR_DETAILS))
1994 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1995 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1996 fprintf (vect_dump, " and ");
1997 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
2004 /* Function vect_analyze_data_refs_alignment
2006 Analyze the alignment of the data-references in the loop.
2007 Return FALSE if a data reference is found that cannot be vectorized. */
2009 bool
2010 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2011 bb_vec_info bb_vinfo)
2013 if (vect_print_dump_info (REPORT_DETAILS))
2014 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2016 /* Mark groups of data references with same alignment using
2017 data dependence information. */
2018 if (loop_vinfo)
2020 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2021 struct data_dependence_relation *ddr;
2022 unsigned int i;
2024 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2025 vect_find_same_alignment_drs (ddr, loop_vinfo);
2028 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2030 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2031 fprintf (vect_dump,
2032 "not vectorized: can't calculate alignment for data ref.");
2033 return false;
2036 return true;
2040 /* Analyze groups of strided accesses: check that DR belongs to a group of
2041 strided accesses of legal size, step, etc. Detect gaps, single element
2042 interleaving, and other special cases. Set strided access info.
2043 Collect groups of strided stores for further use in SLP analysis. */
2045 static bool
2046 vect_analyze_group_access (struct data_reference *dr)
2048 tree step = DR_STEP (dr);
2049 tree scalar_type = TREE_TYPE (DR_REF (dr));
2050 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2051 gimple stmt = DR_STMT (dr);
2052 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2053 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2054 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2055 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2056 HOST_WIDE_INT stride, last_accessed_element = 1;
2057 bool slp_impossible = false;
2059 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2060 interleaving group (including gaps). */
2061 stride = dr_step / type_size;
2063 /* Not consecutive access is possible only if it is a part of interleaving. */
2064 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2066 /* Check if it this DR is a part of interleaving, and is a single
2067 element of the group that is accessed in the loop. */
2069 /* Gaps are supported only for loads. STEP must be a multiple of the type
2070 size. The size of the group must be a power of 2. */
2071 if (DR_IS_READ (dr)
2072 && (dr_step % type_size) == 0
2073 && stride > 0
2074 && exact_log2 (stride) != -1)
2076 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2077 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2078 if (vect_print_dump_info (REPORT_DR_DETAILS))
2080 fprintf (vect_dump, "Detected single element interleaving ");
2081 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2082 fprintf (vect_dump, " step ");
2083 print_generic_expr (vect_dump, step, TDF_SLIM);
2086 if (loop_vinfo)
2088 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2090 if (vect_print_dump_info (REPORT_DETAILS))
2091 fprintf (vect_dump, "Data access with gaps requires scalar "
2092 "epilogue loop");
2095 return true;
2098 if (vect_print_dump_info (REPORT_DETAILS))
2100 fprintf (vect_dump, "not consecutive access ");
2101 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2104 if (bb_vinfo)
2106 /* Mark the statement as unvectorizable. */
2107 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2108 return true;
2111 return false;
2114 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2116 /* First stmt in the interleaving chain. Check the chain. */
2117 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2118 struct data_reference *data_ref = dr;
2119 unsigned int count = 1;
2120 tree next_step;
2121 tree prev_init = DR_INIT (data_ref);
2122 gimple prev = stmt;
2123 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2125 while (next)
2127 /* Skip same data-refs. In case that two or more stmts share
2128 data-ref (supported only for loads), we vectorize only the first
2129 stmt, and the rest get their vectorized loads from the first
2130 one. */
2131 if (!tree_int_cst_compare (DR_INIT (data_ref),
2132 DR_INIT (STMT_VINFO_DATA_REF (
2133 vinfo_for_stmt (next)))))
2135 if (DR_IS_WRITE (data_ref))
2137 if (vect_print_dump_info (REPORT_DETAILS))
2138 fprintf (vect_dump, "Two store stmts share the same dr.");
2139 return false;
2142 /* Check that there is no load-store dependencies for this loads
2143 to prevent a case of load-store-load to the same location. */
2144 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2145 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2147 if (vect_print_dump_info (REPORT_DETAILS))
2148 fprintf (vect_dump,
2149 "READ_WRITE dependence in interleaving.");
2150 return false;
2153 /* For load use the same data-ref load. */
2154 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2156 prev = next;
2157 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2158 continue;
2161 prev = next;
2163 /* Check that all the accesses have the same STEP. */
2164 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2165 if (tree_int_cst_compare (step, next_step))
2167 if (vect_print_dump_info (REPORT_DETAILS))
2168 fprintf (vect_dump, "not consecutive access in interleaving");
2169 return false;
2172 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2173 /* Check that the distance between two accesses is equal to the type
2174 size. Otherwise, we have gaps. */
2175 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2176 - TREE_INT_CST_LOW (prev_init)) / type_size;
2177 if (diff != 1)
2179 /* FORNOW: SLP of accesses with gaps is not supported. */
2180 slp_impossible = true;
2181 if (DR_IS_WRITE (data_ref))
2183 if (vect_print_dump_info (REPORT_DETAILS))
2184 fprintf (vect_dump, "interleaved store with gaps");
2185 return false;
2188 gaps += diff - 1;
2191 last_accessed_element += diff;
2193 /* Store the gap from the previous member of the group. If there is no
2194 gap in the access, GROUP_GAP is always 1. */
2195 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2197 prev_init = DR_INIT (data_ref);
2198 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2199 /* Count the number of data-refs in the chain. */
2200 count++;
2203 /* COUNT is the number of accesses found, we multiply it by the size of
2204 the type to get COUNT_IN_BYTES. */
2205 count_in_bytes = type_size * count;
2207 /* Check that the size of the interleaving (including gaps) is not
2208 greater than STEP. */
2209 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2211 if (vect_print_dump_info (REPORT_DETAILS))
2213 fprintf (vect_dump, "interleaving size is greater than step for ");
2214 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2216 return false;
2219 /* Check that the size of the interleaving is equal to STEP for stores,
2220 i.e., that there are no gaps. */
2221 if (dr_step && dr_step != count_in_bytes)
2223 if (DR_IS_READ (dr))
2225 slp_impossible = true;
2226 /* There is a gap after the last load in the group. This gap is a
2227 difference between the stride and the number of elements. When
2228 there is no gap, this difference should be 0. */
2229 GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2231 else
2233 if (vect_print_dump_info (REPORT_DETAILS))
2234 fprintf (vect_dump, "interleaved store with gaps");
2235 return false;
2239 /* Check that STEP is a multiple of type size. */
2240 if (dr_step && (dr_step % type_size) != 0)
2242 if (vect_print_dump_info (REPORT_DETAILS))
2244 fprintf (vect_dump, "step is not a multiple of type size: step ");
2245 print_generic_expr (vect_dump, step, TDF_SLIM);
2246 fprintf (vect_dump, " size ");
2247 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2248 TDF_SLIM);
2250 return false;
2253 if (stride == 0)
2254 stride = count;
2256 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2257 if (vect_print_dump_info (REPORT_DETAILS))
2258 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2260 /* SLP: create an SLP data structure for every interleaving group of
2261 stores for further analysis in vect_analyse_slp. */
2262 if (DR_IS_WRITE (dr) && !slp_impossible)
2264 if (loop_vinfo)
2265 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2266 stmt);
2267 if (bb_vinfo)
2268 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2269 stmt);
2272 /* There is a gap in the end of the group. */
2273 if (stride - last_accessed_element > 0 && loop_vinfo)
2275 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2276 if (vect_print_dump_info (REPORT_DETAILS))
2277 fprintf (vect_dump, "Data access with gaps requires scalar "
2278 "epilogue loop");
2282 return true;
2286 /* Analyze the access pattern of the data-reference DR.
2287 In case of non-consecutive accesses call vect_analyze_group_access() to
2288 analyze groups of strided accesses. */
2290 static bool
2291 vect_analyze_data_ref_access (struct data_reference *dr)
2293 tree step = DR_STEP (dr);
2294 tree scalar_type = TREE_TYPE (DR_REF (dr));
2295 gimple stmt = DR_STMT (dr);
2296 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2297 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2298 struct loop *loop = NULL;
2299 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2301 if (loop_vinfo)
2302 loop = LOOP_VINFO_LOOP (loop_vinfo);
2304 if (loop_vinfo && !step)
2306 if (vect_print_dump_info (REPORT_DETAILS))
2307 fprintf (vect_dump, "bad data-ref access in loop");
2308 return false;
2311 /* Allow invariant loads in loops. */
2312 if (loop_vinfo && dr_step == 0)
2314 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2315 return DR_IS_READ (dr);
2318 if (loop && nested_in_vect_loop_p (loop, stmt))
2320 /* Interleaved accesses are not yet supported within outer-loop
2321 vectorization for references in the inner-loop. */
2322 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2324 /* For the rest of the analysis we use the outer-loop step. */
2325 step = STMT_VINFO_DR_STEP (stmt_info);
2326 dr_step = TREE_INT_CST_LOW (step);
2328 if (dr_step == 0)
2330 if (vect_print_dump_info (REPORT_ALIGNMENT))
2331 fprintf (vect_dump, "zero step in outer loop.");
2332 if (DR_IS_READ (dr))
2333 return true;
2334 else
2335 return false;
2339 /* Consecutive? */
2340 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2341 || (dr_step < 0
2342 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2344 /* Mark that it is not interleaving. */
2345 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2346 return true;
2349 if (loop && nested_in_vect_loop_p (loop, stmt))
2351 if (vect_print_dump_info (REPORT_ALIGNMENT))
2352 fprintf (vect_dump, "strided access in outer loop.");
2353 return false;
2356 /* Not consecutive access - check if it's a part of interleaving group. */
2357 return vect_analyze_group_access (dr);
2361 /* Function vect_analyze_data_ref_accesses.
2363 Analyze the access pattern of all the data references in the loop.
2365 FORNOW: the only access pattern that is considered vectorizable is a
2366 simple step 1 (consecutive) access.
2368 FORNOW: handle only arrays and pointer accesses. */
2370 bool
2371 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2373 unsigned int i;
2374 VEC (data_reference_p, heap) *datarefs;
2375 struct data_reference *dr;
2377 if (vect_print_dump_info (REPORT_DETAILS))
2378 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2380 if (loop_vinfo)
2381 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2382 else
2383 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2385 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2386 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2387 && !vect_analyze_data_ref_access (dr))
2389 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2390 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2392 if (bb_vinfo)
2394 /* Mark the statement as not vectorizable. */
2395 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2396 continue;
2398 else
2399 return false;
2402 return true;
2405 /* Function vect_prune_runtime_alias_test_list.
2407 Prune a list of ddrs to be tested at run-time by versioning for alias.
2408 Return FALSE if resulting list of ddrs is longer then allowed by
2409 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2411 bool
2412 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2414 VEC (ddr_p, heap) * ddrs =
2415 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2416 unsigned i, j;
2418 if (vect_print_dump_info (REPORT_DETAILS))
2419 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2421 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2423 bool found;
2424 ddr_p ddr_i;
2426 ddr_i = VEC_index (ddr_p, ddrs, i);
2427 found = false;
2429 for (j = 0; j < i; j++)
2431 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2433 if (vect_vfa_range_equal (ddr_i, ddr_j))
2435 if (vect_print_dump_info (REPORT_DR_DETAILS))
2437 fprintf (vect_dump, "found equal ranges ");
2438 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2439 fprintf (vect_dump, ", ");
2440 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2441 fprintf (vect_dump, " and ");
2442 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2443 fprintf (vect_dump, ", ");
2444 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2446 found = true;
2447 break;
2451 if (found)
2453 VEC_ordered_remove (ddr_p, ddrs, i);
2454 continue;
2456 i++;
2459 if (VEC_length (ddr_p, ddrs) >
2460 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2462 if (vect_print_dump_info (REPORT_DR_DETAILS))
2464 fprintf (vect_dump,
2465 "disable versioning for alias - max number of generated "
2466 "checks exceeded.");
2469 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2471 return false;
2474 return true;
2478 /* Function vect_analyze_data_refs.
2480 Find all the data references in the loop or basic block.
2482 The general structure of the analysis of data refs in the vectorizer is as
2483 follows:
2484 1- vect_analyze_data_refs(loop/bb): call
2485 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2486 in the loop/bb and their dependences.
2487 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2488 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2489 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2493 bool
2494 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2495 bb_vec_info bb_vinfo,
2496 int *min_vf)
2498 struct loop *loop = NULL;
2499 basic_block bb = NULL;
2500 unsigned int i;
2501 VEC (data_reference_p, heap) *datarefs;
2502 struct data_reference *dr;
2503 tree scalar_type;
2504 bool res;
2506 if (vect_print_dump_info (REPORT_DETAILS))
2507 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2509 if (loop_vinfo)
2511 loop = LOOP_VINFO_LOOP (loop_vinfo);
2512 res = compute_data_dependences_for_loop
2513 (loop, true,
2514 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2515 &LOOP_VINFO_DATAREFS (loop_vinfo),
2516 &LOOP_VINFO_DDRS (loop_vinfo));
2518 if (!res)
2520 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2521 fprintf (vect_dump, "not vectorized: loop contains function calls"
2522 " or data references that cannot be analyzed");
2523 return false;
2526 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2528 else
2530 bb = BB_VINFO_BB (bb_vinfo);
2531 res = compute_data_dependences_for_bb (bb, true,
2532 &BB_VINFO_DATAREFS (bb_vinfo),
2533 &BB_VINFO_DDRS (bb_vinfo));
2534 if (!res)
2536 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2537 fprintf (vect_dump, "not vectorized: basic block contains function"
2538 " calls or data references that cannot be analyzed");
2539 return false;
2542 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2545 /* Go through the data-refs, check that the analysis succeeded. Update
2546 pointer from stmt_vec_info struct to DR and vectype. */
2548 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2550 gimple stmt;
2551 stmt_vec_info stmt_info;
2552 tree base, offset, init;
2553 int vf;
2555 if (!dr || !DR_REF (dr))
2557 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2558 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2559 return false;
2562 stmt = DR_STMT (dr);
2563 stmt_info = vinfo_for_stmt (stmt);
2565 /* Check that analysis of the data-ref succeeded. */
2566 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2567 || !DR_STEP (dr))
2569 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2571 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2572 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2575 if (bb_vinfo)
2577 /* Mark the statement as not vectorizable. */
2578 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2579 continue;
2581 else
2582 return false;
2585 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2587 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2588 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2589 "constant");
2590 if (bb_vinfo)
2592 /* Mark the statement as not vectorizable. */
2593 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2594 continue;
2596 else
2597 return false;
2600 if (TREE_THIS_VOLATILE (DR_REF (dr)))
2602 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2604 fprintf (vect_dump, "not vectorized: volatile type ");
2605 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2607 return false;
2610 base = unshare_expr (DR_BASE_ADDRESS (dr));
2611 offset = unshare_expr (DR_OFFSET (dr));
2612 init = unshare_expr (DR_INIT (dr));
2614 if (stmt_can_throw_internal (stmt))
2616 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2618 fprintf (vect_dump, "not vectorized: statement can throw an "
2619 "exception ");
2620 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2622 return false;
2625 /* Update DR field in stmt_vec_info struct. */
2627 /* If the dataref is in an inner-loop of the loop that is considered for
2628 for vectorization, we also want to analyze the access relative to
2629 the outer-loop (DR contains information only relative to the
2630 inner-most enclosing loop). We do that by building a reference to the
2631 first location accessed by the inner-loop, and analyze it relative to
2632 the outer-loop. */
2633 if (loop && nested_in_vect_loop_p (loop, stmt))
2635 tree outer_step, outer_base, outer_init;
2636 HOST_WIDE_INT pbitsize, pbitpos;
2637 tree poffset;
2638 enum machine_mode pmode;
2639 int punsignedp, pvolatilep;
2640 affine_iv base_iv, offset_iv;
2641 tree dinit;
2643 /* Build a reference to the first location accessed by the
2644 inner-loop: *(BASE+INIT). (The first location is actually
2645 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2646 tree inner_base = build_fold_indirect_ref
2647 (fold_build_pointer_plus (base, init));
2649 if (vect_print_dump_info (REPORT_DETAILS))
2651 fprintf (vect_dump, "analyze in outer-loop: ");
2652 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2655 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2656 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2657 gcc_assert (outer_base != NULL_TREE);
2659 if (pbitpos % BITS_PER_UNIT != 0)
2661 if (vect_print_dump_info (REPORT_DETAILS))
2662 fprintf (vect_dump, "failed: bit offset alignment.\n");
2663 return false;
2666 outer_base = build_fold_addr_expr (outer_base);
2667 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2668 &base_iv, false))
2670 if (vect_print_dump_info (REPORT_DETAILS))
2671 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2672 return false;
2675 if (offset)
2677 if (poffset)
2678 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2679 poffset);
2680 else
2681 poffset = offset;
2684 if (!poffset)
2686 offset_iv.base = ssize_int (0);
2687 offset_iv.step = ssize_int (0);
2689 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2690 &offset_iv, false))
2692 if (vect_print_dump_info (REPORT_DETAILS))
2693 fprintf (vect_dump, "evolution of offset is not affine.\n");
2694 return false;
2697 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2698 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2699 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2700 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2701 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2703 outer_step = size_binop (PLUS_EXPR,
2704 fold_convert (ssizetype, base_iv.step),
2705 fold_convert (ssizetype, offset_iv.step));
2707 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2708 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2709 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2710 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2711 STMT_VINFO_DR_OFFSET (stmt_info) =
2712 fold_convert (ssizetype, offset_iv.base);
2713 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2714 size_int (highest_pow2_factor (offset_iv.base));
2716 if (vect_print_dump_info (REPORT_DETAILS))
2718 fprintf (vect_dump, "\touter base_address: ");
2719 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2720 fprintf (vect_dump, "\n\touter offset from base address: ");
2721 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2722 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2723 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2724 fprintf (vect_dump, "\n\touter step: ");
2725 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2726 fprintf (vect_dump, "\n\touter aligned to: ");
2727 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2731 if (STMT_VINFO_DATA_REF (stmt_info))
2733 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2735 fprintf (vect_dump,
2736 "not vectorized: more than one data ref in stmt: ");
2737 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2739 return false;
2742 STMT_VINFO_DATA_REF (stmt_info) = dr;
2744 /* Set vectype for STMT. */
2745 scalar_type = TREE_TYPE (DR_REF (dr));
2746 STMT_VINFO_VECTYPE (stmt_info) =
2747 get_vectype_for_scalar_type (scalar_type);
2748 if (!STMT_VINFO_VECTYPE (stmt_info))
2750 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2752 fprintf (vect_dump,
2753 "not vectorized: no vectype for stmt: ");
2754 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2755 fprintf (vect_dump, " scalar_type: ");
2756 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2759 if (bb_vinfo)
2761 /* Mark the statement as not vectorizable. */
2762 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2763 continue;
2765 else
2766 return false;
2769 /* Adjust the minimal vectorization factor according to the
2770 vector type. */
2771 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2772 if (vf > *min_vf)
2773 *min_vf = vf;
2776 return true;
2780 /* Function vect_get_new_vect_var.
2782 Returns a name for a new variable. The current naming scheme appends the
2783 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2784 the name of vectorizer generated variables, and appends that to NAME if
2785 provided. */
2787 tree
2788 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2790 const char *prefix;
2791 tree new_vect_var;
2793 switch (var_kind)
2795 case vect_simple_var:
2796 prefix = "vect_";
2797 break;
2798 case vect_scalar_var:
2799 prefix = "stmp_";
2800 break;
2801 case vect_pointer_var:
2802 prefix = "vect_p";
2803 break;
2804 default:
2805 gcc_unreachable ();
2808 if (name)
2810 char* tmp = concat (prefix, name, NULL);
2811 new_vect_var = create_tmp_var (type, tmp);
2812 free (tmp);
2814 else
2815 new_vect_var = create_tmp_var (type, prefix);
2817 /* Mark vector typed variable as a gimple register variable. */
2818 if (TREE_CODE (type) == VECTOR_TYPE)
2819 DECL_GIMPLE_REG_P (new_vect_var) = true;
2821 return new_vect_var;
2825 /* Function vect_create_addr_base_for_vector_ref.
2827 Create an expression that computes the address of the first memory location
2828 that will be accessed for a data reference.
2830 Input:
2831 STMT: The statement containing the data reference.
2832 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2833 OFFSET: Optional. If supplied, it is be added to the initial address.
2834 LOOP: Specify relative to which loop-nest should the address be computed.
2835 For example, when the dataref is in an inner-loop nested in an
2836 outer-loop that is now being vectorized, LOOP can be either the
2837 outer-loop, or the inner-loop. The first memory location accessed
2838 by the following dataref ('in' points to short):
2840 for (i=0; i<N; i++)
2841 for (j=0; j<M; j++)
2842 s += in[i+j]
2844 is as follows:
2845 if LOOP=i_loop: &in (relative to i_loop)
2846 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2848 Output:
2849 1. Return an SSA_NAME whose value is the address of the memory location of
2850 the first vector of the data reference.
2851 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2852 these statement(s) which define the returned SSA_NAME.
2854 FORNOW: We are only handling array accesses with step 1. */
2856 tree
2857 vect_create_addr_base_for_vector_ref (gimple stmt,
2858 gimple_seq *new_stmt_list,
2859 tree offset,
2860 struct loop *loop)
2862 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2863 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2864 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2865 tree base_name;
2866 tree data_ref_base_var;
2867 tree vec_stmt;
2868 tree addr_base, addr_expr;
2869 tree dest;
2870 gimple_seq seq = NULL;
2871 tree base_offset = unshare_expr (DR_OFFSET (dr));
2872 tree init = unshare_expr (DR_INIT (dr));
2873 tree vect_ptr_type;
2874 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2875 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2876 tree base;
2878 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2880 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2882 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2884 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2885 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2886 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2889 if (loop_vinfo)
2890 base_name = build_fold_indirect_ref (data_ref_base);
2891 else
2893 base_offset = ssize_int (0);
2894 init = ssize_int (0);
2895 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2898 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2899 add_referenced_var (data_ref_base_var);
2900 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2901 data_ref_base_var);
2902 gimple_seq_add_seq (new_stmt_list, seq);
2904 /* Create base_offset */
2905 base_offset = size_binop (PLUS_EXPR,
2906 fold_convert (sizetype, base_offset),
2907 fold_convert (sizetype, init));
2908 dest = create_tmp_var (sizetype, "base_off");
2909 add_referenced_var (dest);
2910 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2911 gimple_seq_add_seq (new_stmt_list, seq);
2913 if (offset)
2915 tree tmp = create_tmp_var (sizetype, "offset");
2917 add_referenced_var (tmp);
2918 offset = fold_build2 (MULT_EXPR, sizetype,
2919 fold_convert (sizetype, offset), step);
2920 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2921 base_offset, offset);
2922 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2923 gimple_seq_add_seq (new_stmt_list, seq);
2926 /* base + base_offset */
2927 if (loop_vinfo)
2928 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
2929 else
2931 addr_base = build1 (ADDR_EXPR,
2932 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2933 unshare_expr (DR_REF (dr)));
2936 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2937 base = get_base_address (DR_REF (dr));
2938 if (base
2939 && TREE_CODE (base) == MEM_REF)
2940 vect_ptr_type
2941 = build_qualified_type (vect_ptr_type,
2942 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2944 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2945 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2946 get_name (base_name));
2947 add_referenced_var (addr_expr);
2948 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2949 gimple_seq_add_seq (new_stmt_list, seq);
2951 if (DR_PTR_INFO (dr)
2952 && TREE_CODE (vec_stmt) == SSA_NAME)
2954 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2955 if (offset)
2957 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2958 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2962 if (vect_print_dump_info (REPORT_DETAILS))
2964 fprintf (vect_dump, "created ");
2965 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2968 return vec_stmt;
2972 /* Function vect_create_data_ref_ptr.
2974 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
2975 location accessed in the loop by STMT, along with the def-use update
2976 chain to appropriately advance the pointer through the loop iterations.
2977 Also set aliasing information for the pointer. This pointer is used by
2978 the callers to this function to create a memory reference expression for
2979 vector load/store access.
2981 Input:
2982 1. STMT: a stmt that references memory. Expected to be of the form
2983 GIMPLE_ASSIGN <name, data-ref> or
2984 GIMPLE_ASSIGN <data-ref, name>.
2985 2. AGGR_TYPE: the type of the reference, which should be either a vector
2986 or an array.
2987 3. AT_LOOP: the loop where the vector memref is to be created.
2988 4. OFFSET (optional): an offset to be added to the initial address accessed
2989 by the data-ref in STMT.
2990 5. BSI: location where the new stmts are to be placed if there is no loop
2991 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
2992 pointing to the initial address.
2994 Output:
2995 1. Declare a new ptr to vector_type, and have it point to the base of the
2996 data reference (initial addressed accessed by the data reference).
2997 For example, for vector of type V8HI, the following code is generated:
2999 v8hi *ap;
3000 ap = (v8hi *)initial_address;
3002 if OFFSET is not supplied:
3003 initial_address = &a[init];
3004 if OFFSET is supplied:
3005 initial_address = &a[init + OFFSET];
3007 Return the initial_address in INITIAL_ADDRESS.
3009 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3010 update the pointer in each iteration of the loop.
3012 Return the increment stmt that updates the pointer in PTR_INCR.
3014 3. Set INV_P to true if the access pattern of the data reference in the
3015 vectorized loop is invariant. Set it to false otherwise.
3017 4. Return the pointer. */
3019 tree
3020 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3021 tree offset, tree *initial_address,
3022 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3023 bool only_init, bool *inv_p)
3025 tree base_name;
3026 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3027 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3028 struct loop *loop = NULL;
3029 bool nested_in_vect_loop = false;
3030 struct loop *containing_loop = NULL;
3031 tree aggr_ptr_type;
3032 tree aggr_ptr;
3033 tree new_temp;
3034 gimple vec_stmt;
3035 gimple_seq new_stmt_list = NULL;
3036 edge pe = NULL;
3037 basic_block new_bb;
3038 tree aggr_ptr_init;
3039 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3040 tree aptr;
3041 gimple_stmt_iterator incr_gsi;
3042 bool insert_after;
3043 bool negative;
3044 tree indx_before_incr, indx_after_incr;
3045 gimple incr;
3046 tree step;
3047 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3048 tree base;
3050 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3051 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3053 if (loop_vinfo)
3055 loop = LOOP_VINFO_LOOP (loop_vinfo);
3056 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3057 containing_loop = (gimple_bb (stmt))->loop_father;
3058 pe = loop_preheader_edge (loop);
3060 else
3062 gcc_assert (bb_vinfo);
3063 only_init = true;
3064 *ptr_incr = NULL;
3067 /* Check the step (evolution) of the load in LOOP, and record
3068 whether it's invariant. */
3069 if (nested_in_vect_loop)
3070 step = STMT_VINFO_DR_STEP (stmt_info);
3071 else
3072 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3074 if (tree_int_cst_compare (step, size_zero_node) == 0)
3075 *inv_p = true;
3076 else
3077 *inv_p = false;
3078 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3080 /* Create an expression for the first address accessed by this load
3081 in LOOP. */
3082 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3084 if (vect_print_dump_info (REPORT_DETAILS))
3086 tree data_ref_base = base_name;
3087 fprintf (vect_dump, "create %s-pointer variable to type: ",
3088 tree_code_name[(int) TREE_CODE (aggr_type)]);
3089 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3090 if (TREE_CODE (data_ref_base) == VAR_DECL
3091 || TREE_CODE (data_ref_base) == ARRAY_REF)
3092 fprintf (vect_dump, " vectorizing an array ref: ");
3093 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3094 fprintf (vect_dump, " vectorizing a record based array ref: ");
3095 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3096 fprintf (vect_dump, " vectorizing a pointer ref: ");
3097 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3100 /* (1) Create the new aggregate-pointer variable. */
3101 aggr_ptr_type = build_pointer_type (aggr_type);
3102 base = get_base_address (DR_REF (dr));
3103 if (base
3104 && TREE_CODE (base) == MEM_REF)
3105 aggr_ptr_type
3106 = build_qualified_type (aggr_ptr_type,
3107 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3108 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3109 get_name (base_name));
3111 /* Vector and array types inherit the alias set of their component
3112 type by default so we need to use a ref-all pointer if the data
3113 reference does not conflict with the created aggregated data
3114 reference because it is not addressable. */
3115 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3116 get_alias_set (DR_REF (dr))))
3118 aggr_ptr_type
3119 = build_pointer_type_for_mode (aggr_type,
3120 TYPE_MODE (aggr_ptr_type), true);
3121 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3122 get_name (base_name));
3125 /* Likewise for any of the data references in the stmt group. */
3126 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3128 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3131 tree lhs = gimple_assign_lhs (orig_stmt);
3132 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3133 get_alias_set (lhs)))
3135 aggr_ptr_type
3136 = build_pointer_type_for_mode (aggr_type,
3137 TYPE_MODE (aggr_ptr_type), true);
3138 aggr_ptr
3139 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3140 get_name (base_name));
3141 break;
3144 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3146 while (orig_stmt);
3149 add_referenced_var (aggr_ptr);
3151 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3152 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3153 def-use update cycles for the pointer: one relative to the outer-loop
3154 (LOOP), which is what steps (3) and (4) below do. The other is relative
3155 to the inner-loop (which is the inner-most loop containing the dataref),
3156 and this is done be step (5) below.
3158 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3159 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3160 redundant. Steps (3),(4) create the following:
3162 vp0 = &base_addr;
3163 LOOP: vp1 = phi(vp0,vp2)
3166 vp2 = vp1 + step
3167 goto LOOP
3169 If there is an inner-loop nested in loop, then step (5) will also be
3170 applied, and an additional update in the inner-loop will be created:
3172 vp0 = &base_addr;
3173 LOOP: vp1 = phi(vp0,vp2)
3175 inner: vp3 = phi(vp1,vp4)
3176 vp4 = vp3 + inner_step
3177 if () goto inner
3179 vp2 = vp1 + step
3180 if () goto LOOP */
3182 /* (2) Calculate the initial address of the aggregate-pointer, and set
3183 the aggregate-pointer to point to it before the loop. */
3185 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3187 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3188 offset, loop);
3189 if (new_stmt_list)
3191 if (pe)
3193 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3194 gcc_assert (!new_bb);
3196 else
3197 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3200 *initial_address = new_temp;
3202 /* Create: p = (aggr_type *) initial_base */
3203 if (TREE_CODE (new_temp) != SSA_NAME
3204 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3206 vec_stmt = gimple_build_assign (aggr_ptr,
3207 fold_convert (aggr_ptr_type, new_temp));
3208 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3209 /* Copy the points-to information if it exists. */
3210 if (DR_PTR_INFO (dr))
3211 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3212 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3213 if (pe)
3215 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3216 gcc_assert (!new_bb);
3218 else
3219 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3221 else
3222 aggr_ptr_init = new_temp;
3224 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3225 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3226 inner-loop nested in LOOP (during outer-loop vectorization). */
3228 /* No update in loop is required. */
3229 if (only_init && (!loop_vinfo || at_loop == loop))
3230 aptr = aggr_ptr_init;
3231 else
3233 /* The step of the aggregate pointer is the type size. */
3234 tree step = TYPE_SIZE_UNIT (aggr_type);
3235 /* One exception to the above is when the scalar step of the load in
3236 LOOP is zero. In this case the step here is also zero. */
3237 if (*inv_p)
3238 step = size_zero_node;
3239 else if (negative)
3240 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3242 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3244 create_iv (aggr_ptr_init,
3245 fold_convert (aggr_ptr_type, step),
3246 aggr_ptr, loop, &incr_gsi, insert_after,
3247 &indx_before_incr, &indx_after_incr);
3248 incr = gsi_stmt (incr_gsi);
3249 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3251 /* Copy the points-to information if it exists. */
3252 if (DR_PTR_INFO (dr))
3254 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3255 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3257 if (ptr_incr)
3258 *ptr_incr = incr;
3260 aptr = indx_before_incr;
3263 if (!nested_in_vect_loop || only_init)
3264 return aptr;
3267 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3268 nested in LOOP, if exists. */
3270 gcc_assert (nested_in_vect_loop);
3271 if (!only_init)
3273 standard_iv_increment_position (containing_loop, &incr_gsi,
3274 &insert_after);
3275 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3276 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3277 &indx_after_incr);
3278 incr = gsi_stmt (incr_gsi);
3279 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3281 /* Copy the points-to information if it exists. */
3282 if (DR_PTR_INFO (dr))
3284 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3285 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3287 if (ptr_incr)
3288 *ptr_incr = incr;
3290 return indx_before_incr;
3292 else
3293 gcc_unreachable ();
3297 /* Function bump_vector_ptr
3299 Increment a pointer (to a vector type) by vector-size. If requested,
3300 i.e. if PTR-INCR is given, then also connect the new increment stmt
3301 to the existing def-use update-chain of the pointer, by modifying
3302 the PTR_INCR as illustrated below:
3304 The pointer def-use update-chain before this function:
3305 DATAREF_PTR = phi (p_0, p_2)
3306 ....
3307 PTR_INCR: p_2 = DATAREF_PTR + step
3309 The pointer def-use update-chain after this function:
3310 DATAREF_PTR = phi (p_0, p_2)
3311 ....
3312 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3313 ....
3314 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3316 Input:
3317 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3318 in the loop.
3319 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3320 the loop. The increment amount across iterations is expected
3321 to be vector_size.
3322 BSI - location where the new update stmt is to be placed.
3323 STMT - the original scalar memory-access stmt that is being vectorized.
3324 BUMP - optional. The offset by which to bump the pointer. If not given,
3325 the offset is assumed to be vector_size.
3327 Output: Return NEW_DATAREF_PTR as illustrated above.
3331 tree
3332 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3333 gimple stmt, tree bump)
3335 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3336 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3337 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3338 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3339 tree update = TYPE_SIZE_UNIT (vectype);
3340 gimple incr_stmt;
3341 ssa_op_iter iter;
3342 use_operand_p use_p;
3343 tree new_dataref_ptr;
3345 if (bump)
3346 update = bump;
3348 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3349 dataref_ptr, update);
3350 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3351 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3352 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3354 /* Copy the points-to information if it exists. */
3355 if (DR_PTR_INFO (dr))
3357 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3358 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3359 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3362 if (!ptr_incr)
3363 return new_dataref_ptr;
3365 /* Update the vector-pointer's cross-iteration increment. */
3366 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3368 tree use = USE_FROM_PTR (use_p);
3370 if (use == dataref_ptr)
3371 SET_USE (use_p, new_dataref_ptr);
3372 else
3373 gcc_assert (tree_int_cst_compare (use, update) == 0);
3376 return new_dataref_ptr;
3380 /* Function vect_create_destination_var.
3382 Create a new temporary of type VECTYPE. */
3384 tree
3385 vect_create_destination_var (tree scalar_dest, tree vectype)
3387 tree vec_dest;
3388 const char *new_name;
3389 tree type;
3390 enum vect_var_kind kind;
3392 kind = vectype ? vect_simple_var : vect_scalar_var;
3393 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3395 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3397 new_name = get_name (scalar_dest);
3398 if (!new_name)
3399 new_name = "var_";
3400 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3401 add_referenced_var (vec_dest);
3403 return vec_dest;
3406 /* Function vect_strided_store_supported.
3408 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3409 and FALSE otherwise. */
3411 bool
3412 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3414 optab interleave_high_optab, interleave_low_optab;
3415 enum machine_mode mode;
3417 mode = TYPE_MODE (vectype);
3419 /* vect_permute_store_chain requires the group size to be a power of two. */
3420 if (exact_log2 (count) == -1)
3422 if (vect_print_dump_info (REPORT_DETAILS))
3423 fprintf (vect_dump, "the size of the group of strided accesses"
3424 " is not a power of 2");
3425 return false;
3428 /* Check that the operation is supported. */
3429 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3430 vectype, optab_default);
3431 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3432 vectype, optab_default);
3433 if (!interleave_high_optab || !interleave_low_optab)
3435 if (vect_print_dump_info (REPORT_DETAILS))
3436 fprintf (vect_dump, "no optab for interleave.");
3437 return false;
3440 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3441 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3443 if (vect_print_dump_info (REPORT_DETAILS))
3444 fprintf (vect_dump, "interleave op not supported by target.");
3445 return false;
3448 return true;
3452 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3453 type VECTYPE. */
3455 bool
3456 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3458 return vect_lanes_optab_supported_p ("vec_store_lanes",
3459 vec_store_lanes_optab,
3460 vectype, count);
3464 /* Function vect_permute_store_chain.
3466 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3467 a power of 2, generate interleave_high/low stmts to reorder the data
3468 correctly for the stores. Return the final references for stores in
3469 RESULT_CHAIN.
3471 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3472 The input is 4 vectors each containing 8 elements. We assign a number to
3473 each element, the input sequence is:
3475 1st vec: 0 1 2 3 4 5 6 7
3476 2nd vec: 8 9 10 11 12 13 14 15
3477 3rd vec: 16 17 18 19 20 21 22 23
3478 4th vec: 24 25 26 27 28 29 30 31
3480 The output sequence should be:
3482 1st vec: 0 8 16 24 1 9 17 25
3483 2nd vec: 2 10 18 26 3 11 19 27
3484 3rd vec: 4 12 20 28 5 13 21 30
3485 4th vec: 6 14 22 30 7 15 23 31
3487 i.e., we interleave the contents of the four vectors in their order.
3489 We use interleave_high/low instructions to create such output. The input of
3490 each interleave_high/low operation is two vectors:
3491 1st vec 2nd vec
3492 0 1 2 3 4 5 6 7
3493 the even elements of the result vector are obtained left-to-right from the
3494 high/low elements of the first vector. The odd elements of the result are
3495 obtained left-to-right from the high/low elements of the second vector.
3496 The output of interleave_high will be: 0 4 1 5
3497 and of interleave_low: 2 6 3 7
3500 The permutation is done in log LENGTH stages. In each stage interleave_high
3501 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3502 where the first argument is taken from the first half of DR_CHAIN and the
3503 second argument from it's second half.
3504 In our example,
3506 I1: interleave_high (1st vec, 3rd vec)
3507 I2: interleave_low (1st vec, 3rd vec)
3508 I3: interleave_high (2nd vec, 4th vec)
3509 I4: interleave_low (2nd vec, 4th vec)
3511 The output for the first stage is:
3513 I1: 0 16 1 17 2 18 3 19
3514 I2: 4 20 5 21 6 22 7 23
3515 I3: 8 24 9 25 10 26 11 27
3516 I4: 12 28 13 29 14 30 15 31
3518 The output of the second stage, i.e. the final result is:
3520 I1: 0 8 16 24 1 9 17 25
3521 I2: 2 10 18 26 3 11 19 27
3522 I3: 4 12 20 28 5 13 21 30
3523 I4: 6 14 22 30 7 15 23 31. */
3525 void
3526 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3527 unsigned int length,
3528 gimple stmt,
3529 gimple_stmt_iterator *gsi,
3530 VEC(tree,heap) **result_chain)
3532 tree perm_dest, vect1, vect2, high, low;
3533 gimple perm_stmt;
3534 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3535 int i;
3536 unsigned int j;
3537 enum tree_code high_code, low_code;
3539 gcc_assert (vect_strided_store_supported (vectype, length));
3541 *result_chain = VEC_copy (tree, heap, dr_chain);
3543 for (i = 0; i < exact_log2 (length); i++)
3545 for (j = 0; j < length/2; j++)
3547 vect1 = VEC_index (tree, dr_chain, j);
3548 vect2 = VEC_index (tree, dr_chain, j+length/2);
3550 /* Create interleaving stmt:
3551 in the case of big endian:
3552 high = interleave_high (vect1, vect2)
3553 and in the case of little endian:
3554 high = interleave_low (vect1, vect2). */
3555 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3556 DECL_GIMPLE_REG_P (perm_dest) = 1;
3557 add_referenced_var (perm_dest);
3558 if (BYTES_BIG_ENDIAN)
3560 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3561 low_code = VEC_INTERLEAVE_LOW_EXPR;
3563 else
3565 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3566 high_code = VEC_INTERLEAVE_LOW_EXPR;
3568 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3569 vect1, vect2);
3570 high = make_ssa_name (perm_dest, perm_stmt);
3571 gimple_assign_set_lhs (perm_stmt, high);
3572 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3573 VEC_replace (tree, *result_chain, 2*j, high);
3575 /* Create interleaving stmt:
3576 in the case of big endian:
3577 low = interleave_low (vect1, vect2)
3578 and in the case of little endian:
3579 low = interleave_high (vect1, vect2). */
3580 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3581 DECL_GIMPLE_REG_P (perm_dest) = 1;
3582 add_referenced_var (perm_dest);
3583 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3584 vect1, vect2);
3585 low = make_ssa_name (perm_dest, perm_stmt);
3586 gimple_assign_set_lhs (perm_stmt, low);
3587 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3588 VEC_replace (tree, *result_chain, 2*j+1, low);
3590 dr_chain = VEC_copy (tree, heap, *result_chain);
3594 /* Function vect_setup_realignment
3596 This function is called when vectorizing an unaligned load using
3597 the dr_explicit_realign[_optimized] scheme.
3598 This function generates the following code at the loop prolog:
3600 p = initial_addr;
3601 x msq_init = *(floor(p)); # prolog load
3602 realignment_token = call target_builtin;
3603 loop:
3604 x msq = phi (msq_init, ---)
3606 The stmts marked with x are generated only for the case of
3607 dr_explicit_realign_optimized.
3609 The code above sets up a new (vector) pointer, pointing to the first
3610 location accessed by STMT, and a "floor-aligned" load using that pointer.
3611 It also generates code to compute the "realignment-token" (if the relevant
3612 target hook was defined), and creates a phi-node at the loop-header bb
3613 whose arguments are the result of the prolog-load (created by this
3614 function) and the result of a load that takes place in the loop (to be
3615 created by the caller to this function).
3617 For the case of dr_explicit_realign_optimized:
3618 The caller to this function uses the phi-result (msq) to create the
3619 realignment code inside the loop, and sets up the missing phi argument,
3620 as follows:
3621 loop:
3622 msq = phi (msq_init, lsq)
3623 lsq = *(floor(p')); # load in loop
3624 result = realign_load (msq, lsq, realignment_token);
3626 For the case of dr_explicit_realign:
3627 loop:
3628 msq = *(floor(p)); # load in loop
3629 p' = p + (VS-1);
3630 lsq = *(floor(p')); # load in loop
3631 result = realign_load (msq, lsq, realignment_token);
3633 Input:
3634 STMT - (scalar) load stmt to be vectorized. This load accesses
3635 a memory location that may be unaligned.
3636 BSI - place where new code is to be inserted.
3637 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3638 is used.
3640 Output:
3641 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3642 target hook, if defined.
3643 Return value - the result of the loop-header phi node. */
3645 tree
3646 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3647 tree *realignment_token,
3648 enum dr_alignment_support alignment_support_scheme,
3649 tree init_addr,
3650 struct loop **at_loop)
3652 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3653 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3654 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3655 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3656 struct loop *loop = NULL;
3657 edge pe = NULL;
3658 tree scalar_dest = gimple_assign_lhs (stmt);
3659 tree vec_dest;
3660 gimple inc;
3661 tree ptr;
3662 tree data_ref;
3663 gimple new_stmt;
3664 basic_block new_bb;
3665 tree msq_init = NULL_TREE;
3666 tree new_temp;
3667 gimple phi_stmt;
3668 tree msq = NULL_TREE;
3669 gimple_seq stmts = NULL;
3670 bool inv_p;
3671 bool compute_in_loop = false;
3672 bool nested_in_vect_loop = false;
3673 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3674 struct loop *loop_for_initial_load = NULL;
3676 if (loop_vinfo)
3678 loop = LOOP_VINFO_LOOP (loop_vinfo);
3679 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3682 gcc_assert (alignment_support_scheme == dr_explicit_realign
3683 || alignment_support_scheme == dr_explicit_realign_optimized);
3685 /* We need to generate three things:
3686 1. the misalignment computation
3687 2. the extra vector load (for the optimized realignment scheme).
3688 3. the phi node for the two vectors from which the realignment is
3689 done (for the optimized realignment scheme). */
3691 /* 1. Determine where to generate the misalignment computation.
3693 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3694 calculation will be generated by this function, outside the loop (in the
3695 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3696 caller, inside the loop.
3698 Background: If the misalignment remains fixed throughout the iterations of
3699 the loop, then both realignment schemes are applicable, and also the
3700 misalignment computation can be done outside LOOP. This is because we are
3701 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3702 are a multiple of VS (the Vector Size), and therefore the misalignment in
3703 different vectorized LOOP iterations is always the same.
3704 The problem arises only if the memory access is in an inner-loop nested
3705 inside LOOP, which is now being vectorized using outer-loop vectorization.
3706 This is the only case when the misalignment of the memory access may not
3707 remain fixed throughout the iterations of the inner-loop (as explained in
3708 detail in vect_supportable_dr_alignment). In this case, not only is the
3709 optimized realignment scheme not applicable, but also the misalignment
3710 computation (and generation of the realignment token that is passed to
3711 REALIGN_LOAD) have to be done inside the loop.
3713 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3714 or not, which in turn determines if the misalignment is computed inside
3715 the inner-loop, or outside LOOP. */
3717 if (init_addr != NULL_TREE || !loop_vinfo)
3719 compute_in_loop = true;
3720 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3724 /* 2. Determine where to generate the extra vector load.
3726 For the optimized realignment scheme, instead of generating two vector
3727 loads in each iteration, we generate a single extra vector load in the
3728 preheader of the loop, and in each iteration reuse the result of the
3729 vector load from the previous iteration. In case the memory access is in
3730 an inner-loop nested inside LOOP, which is now being vectorized using
3731 outer-loop vectorization, we need to determine whether this initial vector
3732 load should be generated at the preheader of the inner-loop, or can be
3733 generated at the preheader of LOOP. If the memory access has no evolution
3734 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3735 to be generated inside LOOP (in the preheader of the inner-loop). */
3737 if (nested_in_vect_loop)
3739 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3740 bool invariant_in_outerloop =
3741 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3742 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3744 else
3745 loop_for_initial_load = loop;
3746 if (at_loop)
3747 *at_loop = loop_for_initial_load;
3749 if (loop_for_initial_load)
3750 pe = loop_preheader_edge (loop_for_initial_load);
3752 /* 3. For the case of the optimized realignment, create the first vector
3753 load at the loop preheader. */
3755 if (alignment_support_scheme == dr_explicit_realign_optimized)
3757 /* Create msq_init = *(floor(p1)) in the loop preheader */
3759 gcc_assert (!compute_in_loop);
3760 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3761 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
3762 NULL_TREE, &init_addr, NULL, &inc,
3763 true, &inv_p);
3764 new_stmt = gimple_build_assign_with_ops
3765 (BIT_AND_EXPR, NULL_TREE, ptr,
3766 build_int_cst (TREE_TYPE (ptr),
3767 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3768 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3769 gimple_assign_set_lhs (new_stmt, new_temp);
3770 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3771 gcc_assert (!new_bb);
3772 data_ref
3773 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3774 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3775 new_stmt = gimple_build_assign (vec_dest, data_ref);
3776 new_temp = make_ssa_name (vec_dest, new_stmt);
3777 gimple_assign_set_lhs (new_stmt, new_temp);
3778 mark_symbols_for_renaming (new_stmt);
3779 if (pe)
3781 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3782 gcc_assert (!new_bb);
3784 else
3785 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3787 msq_init = gimple_assign_lhs (new_stmt);
3790 /* 4. Create realignment token using a target builtin, if available.
3791 It is done either inside the containing loop, or before LOOP (as
3792 determined above). */
3794 if (targetm.vectorize.builtin_mask_for_load)
3796 tree builtin_decl;
3798 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3799 if (!init_addr)
3801 /* Generate the INIT_ADDR computation outside LOOP. */
3802 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3803 NULL_TREE, loop);
3804 if (loop)
3806 pe = loop_preheader_edge (loop);
3807 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3808 gcc_assert (!new_bb);
3810 else
3811 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3814 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3815 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3816 vec_dest =
3817 vect_create_destination_var (scalar_dest,
3818 gimple_call_return_type (new_stmt));
3819 new_temp = make_ssa_name (vec_dest, new_stmt);
3820 gimple_call_set_lhs (new_stmt, new_temp);
3822 if (compute_in_loop)
3823 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3824 else
3826 /* Generate the misalignment computation outside LOOP. */
3827 pe = loop_preheader_edge (loop);
3828 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3829 gcc_assert (!new_bb);
3832 *realignment_token = gimple_call_lhs (new_stmt);
3834 /* The result of the CALL_EXPR to this builtin is determined from
3835 the value of the parameter and no global variables are touched
3836 which makes the builtin a "const" function. Requiring the
3837 builtin to have the "const" attribute makes it unnecessary
3838 to call mark_call_clobbered. */
3839 gcc_assert (TREE_READONLY (builtin_decl));
3842 if (alignment_support_scheme == dr_explicit_realign)
3843 return msq;
3845 gcc_assert (!compute_in_loop);
3846 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3849 /* 5. Create msq = phi <msq_init, lsq> in loop */
3851 pe = loop_preheader_edge (containing_loop);
3852 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3853 msq = make_ssa_name (vec_dest, NULL);
3854 phi_stmt = create_phi_node (msq, containing_loop->header);
3855 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3856 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3858 return msq;
3862 /* Function vect_strided_load_supported.
3864 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3865 and FALSE otherwise. */
3867 bool
3868 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
3870 optab perm_even_optab, perm_odd_optab;
3871 enum machine_mode mode;
3873 mode = TYPE_MODE (vectype);
3875 /* vect_permute_load_chain requires the group size to be a power of two. */
3876 if (exact_log2 (count) == -1)
3878 if (vect_print_dump_info (REPORT_DETAILS))
3879 fprintf (vect_dump, "the size of the group of strided accesses"
3880 " is not a power of 2");
3881 return false;
3884 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3885 optab_default);
3886 if (!perm_even_optab)
3888 if (vect_print_dump_info (REPORT_DETAILS))
3889 fprintf (vect_dump, "no optab for perm_even.");
3890 return false;
3893 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3895 if (vect_print_dump_info (REPORT_DETAILS))
3896 fprintf (vect_dump, "perm_even op not supported by target.");
3897 return false;
3900 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3901 optab_default);
3902 if (!perm_odd_optab)
3904 if (vect_print_dump_info (REPORT_DETAILS))
3905 fprintf (vect_dump, "no optab for perm_odd.");
3906 return false;
3909 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3911 if (vect_print_dump_info (REPORT_DETAILS))
3912 fprintf (vect_dump, "perm_odd op not supported by target.");
3913 return false;
3915 return true;
3918 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
3919 type VECTYPE. */
3921 bool
3922 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3924 return vect_lanes_optab_supported_p ("vec_load_lanes",
3925 vec_load_lanes_optab,
3926 vectype, count);
3929 /* Function vect_permute_load_chain.
3931 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3932 a power of 2, generate extract_even/odd stmts to reorder the input data
3933 correctly. Return the final references for loads in RESULT_CHAIN.
3935 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3936 The input is 4 vectors each containing 8 elements. We assign a number to each
3937 element, the input sequence is:
3939 1st vec: 0 1 2 3 4 5 6 7
3940 2nd vec: 8 9 10 11 12 13 14 15
3941 3rd vec: 16 17 18 19 20 21 22 23
3942 4th vec: 24 25 26 27 28 29 30 31
3944 The output sequence should be:
3946 1st vec: 0 4 8 12 16 20 24 28
3947 2nd vec: 1 5 9 13 17 21 25 29
3948 3rd vec: 2 6 10 14 18 22 26 30
3949 4th vec: 3 7 11 15 19 23 27 31
3951 i.e., the first output vector should contain the first elements of each
3952 interleaving group, etc.
3954 We use extract_even/odd instructions to create such output. The input of
3955 each extract_even/odd operation is two vectors
3956 1st vec 2nd vec
3957 0 1 2 3 4 5 6 7
3959 and the output is the vector of extracted even/odd elements. The output of
3960 extract_even will be: 0 2 4 6
3961 and of extract_odd: 1 3 5 7
3964 The permutation is done in log LENGTH stages. In each stage extract_even
3965 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3966 their order. In our example,
3968 E1: extract_even (1st vec, 2nd vec)
3969 E2: extract_odd (1st vec, 2nd vec)
3970 E3: extract_even (3rd vec, 4th vec)
3971 E4: extract_odd (3rd vec, 4th vec)
3973 The output for the first stage will be:
3975 E1: 0 2 4 6 8 10 12 14
3976 E2: 1 3 5 7 9 11 13 15
3977 E3: 16 18 20 22 24 26 28 30
3978 E4: 17 19 21 23 25 27 29 31
3980 In order to proceed and create the correct sequence for the next stage (or
3981 for the correct output, if the second stage is the last one, as in our
3982 example), we first put the output of extract_even operation and then the
3983 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3984 The input for the second stage is:
3986 1st vec (E1): 0 2 4 6 8 10 12 14
3987 2nd vec (E3): 16 18 20 22 24 26 28 30
3988 3rd vec (E2): 1 3 5 7 9 11 13 15
3989 4th vec (E4): 17 19 21 23 25 27 29 31
3991 The output of the second stage:
3993 E1: 0 4 8 12 16 20 24 28
3994 E2: 2 6 10 14 18 22 26 30
3995 E3: 1 5 9 13 17 21 25 29
3996 E4: 3 7 11 15 19 23 27 31
3998 And RESULT_CHAIN after reordering:
4000 1st vec (E1): 0 4 8 12 16 20 24 28
4001 2nd vec (E3): 1 5 9 13 17 21 25 29
4002 3rd vec (E2): 2 6 10 14 18 22 26 30
4003 4th vec (E4): 3 7 11 15 19 23 27 31. */
4005 static void
4006 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4007 unsigned int length,
4008 gimple stmt,
4009 gimple_stmt_iterator *gsi,
4010 VEC(tree,heap) **result_chain)
4012 tree perm_dest, data_ref, first_vect, second_vect;
4013 gimple perm_stmt;
4014 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4015 int i;
4016 unsigned int j;
4018 gcc_assert (vect_strided_load_supported (vectype, length));
4020 *result_chain = VEC_copy (tree, heap, dr_chain);
4021 for (i = 0; i < exact_log2 (length); i++)
4023 for (j = 0; j < length; j +=2)
4025 first_vect = VEC_index (tree, dr_chain, j);
4026 second_vect = VEC_index (tree, dr_chain, j+1);
4028 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4029 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4030 DECL_GIMPLE_REG_P (perm_dest) = 1;
4031 add_referenced_var (perm_dest);
4033 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
4034 perm_dest, first_vect,
4035 second_vect);
4037 data_ref = make_ssa_name (perm_dest, perm_stmt);
4038 gimple_assign_set_lhs (perm_stmt, data_ref);
4039 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4040 mark_symbols_for_renaming (perm_stmt);
4042 VEC_replace (tree, *result_chain, j/2, data_ref);
4044 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4045 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4046 DECL_GIMPLE_REG_P (perm_dest) = 1;
4047 add_referenced_var (perm_dest);
4049 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
4050 perm_dest, first_vect,
4051 second_vect);
4052 data_ref = make_ssa_name (perm_dest, perm_stmt);
4053 gimple_assign_set_lhs (perm_stmt, data_ref);
4054 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4055 mark_symbols_for_renaming (perm_stmt);
4057 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4059 dr_chain = VEC_copy (tree, heap, *result_chain);
4064 /* Function vect_transform_strided_load.
4066 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4067 to perform their permutation and ascribe the result vectorized statements to
4068 the scalar statements.
4071 void
4072 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4073 gimple_stmt_iterator *gsi)
4075 VEC(tree,heap) *result_chain = NULL;
4077 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4078 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4079 vectors, that are ready for vector computation. */
4080 result_chain = VEC_alloc (tree, heap, size);
4081 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4082 vect_record_strided_load_vectors (stmt, result_chain);
4083 VEC_free (tree, heap, result_chain);
4086 /* RESULT_CHAIN contains the output of a group of strided loads that were
4087 generated as part of the vectorization of STMT. Assign the statement
4088 for each vector to the associated scalar statement. */
4090 void
4091 vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4093 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4094 gimple next_stmt, new_stmt;
4095 unsigned int i, gap_count;
4096 tree tmp_data_ref;
4098 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4099 Since we scan the chain starting from it's first node, their order
4100 corresponds the order of data-refs in RESULT_CHAIN. */
4101 next_stmt = first_stmt;
4102 gap_count = 1;
4103 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4105 if (!next_stmt)
4106 break;
4108 /* Skip the gaps. Loads created for the gaps will be removed by dead
4109 code elimination pass later. No need to check for the first stmt in
4110 the group, since it always exists.
4111 GROUP_GAP is the number of steps in elements from the previous
4112 access (if there is no gap GROUP_GAP is 1). We skip loads that
4113 correspond to the gaps. */
4114 if (next_stmt != first_stmt
4115 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4117 gap_count++;
4118 continue;
4121 while (next_stmt)
4123 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4124 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4125 copies, and we put the new vector statement in the first available
4126 RELATED_STMT. */
4127 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4128 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4129 else
4131 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4133 gimple prev_stmt =
4134 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4135 gimple rel_stmt =
4136 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4137 while (rel_stmt)
4139 prev_stmt = rel_stmt;
4140 rel_stmt =
4141 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4144 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4145 new_stmt;
4149 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4150 gap_count = 1;
4151 /* If NEXT_STMT accesses the same DR as the previous statement,
4152 put the same TMP_DATA_REF as its vectorized statement; otherwise
4153 get the next data-ref from RESULT_CHAIN. */
4154 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4155 break;
4160 /* Function vect_force_dr_alignment_p.
4162 Returns whether the alignment of a DECL can be forced to be aligned
4163 on ALIGNMENT bit boundary. */
4165 bool
4166 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4168 if (TREE_CODE (decl) != VAR_DECL)
4169 return false;
4171 if (DECL_EXTERNAL (decl))
4172 return false;
4174 if (TREE_ASM_WRITTEN (decl))
4175 return false;
4177 if (TREE_STATIC (decl))
4178 return (alignment <= MAX_OFILE_ALIGNMENT);
4179 else
4180 return (alignment <= MAX_STACK_ALIGNMENT);
4184 /* Return whether the data reference DR is supported with respect to its
4185 alignment.
4186 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4187 it is aligned, i.e., check if it is possible to vectorize it with different
4188 alignment. */
4190 enum dr_alignment_support
4191 vect_supportable_dr_alignment (struct data_reference *dr,
4192 bool check_aligned_accesses)
4194 gimple stmt = DR_STMT (dr);
4195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4196 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4197 enum machine_mode mode = TYPE_MODE (vectype);
4198 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4199 struct loop *vect_loop = NULL;
4200 bool nested_in_vect_loop = false;
4202 if (aligned_access_p (dr) && !check_aligned_accesses)
4203 return dr_aligned;
4205 if (loop_vinfo)
4207 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4208 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4211 /* Possibly unaligned access. */
4213 /* We can choose between using the implicit realignment scheme (generating
4214 a misaligned_move stmt) and the explicit realignment scheme (generating
4215 aligned loads with a REALIGN_LOAD). There are two variants to the
4216 explicit realignment scheme: optimized, and unoptimized.
4217 We can optimize the realignment only if the step between consecutive
4218 vector loads is equal to the vector size. Since the vector memory
4219 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4220 is guaranteed that the misalignment amount remains the same throughout the
4221 execution of the vectorized loop. Therefore, we can create the
4222 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4223 at the loop preheader.
4225 However, in the case of outer-loop vectorization, when vectorizing a
4226 memory access in the inner-loop nested within the LOOP that is now being
4227 vectorized, while it is guaranteed that the misalignment of the
4228 vectorized memory access will remain the same in different outer-loop
4229 iterations, it is *not* guaranteed that is will remain the same throughout
4230 the execution of the inner-loop. This is because the inner-loop advances
4231 with the original scalar step (and not in steps of VS). If the inner-loop
4232 step happens to be a multiple of VS, then the misalignment remains fixed
4233 and we can use the optimized realignment scheme. For example:
4235 for (i=0; i<N; i++)
4236 for (j=0; j<M; j++)
4237 s += a[i+j];
4239 When vectorizing the i-loop in the above example, the step between
4240 consecutive vector loads is 1, and so the misalignment does not remain
4241 fixed across the execution of the inner-loop, and the realignment cannot
4242 be optimized (as illustrated in the following pseudo vectorized loop):
4244 for (i=0; i<N; i+=4)
4245 for (j=0; j<M; j++){
4246 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4247 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4248 // (assuming that we start from an aligned address).
4251 We therefore have to use the unoptimized realignment scheme:
4253 for (i=0; i<N; i+=4)
4254 for (j=k; j<M; j+=4)
4255 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4256 // that the misalignment of the initial address is
4257 // 0).
4259 The loop can then be vectorized as follows:
4261 for (k=0; k<4; k++){
4262 rt = get_realignment_token (&vp[k]);
4263 for (i=0; i<N; i+=4){
4264 v1 = vp[i+k];
4265 for (j=k; j<M; j+=4){
4266 v2 = vp[i+j+VS-1];
4267 va = REALIGN_LOAD <v1,v2,rt>;
4268 vs += va;
4269 v1 = v2;
4272 } */
4274 if (DR_IS_READ (dr))
4276 bool is_packed = false;
4277 tree type = (TREE_TYPE (DR_REF (dr)));
4279 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4280 && (!targetm.vectorize.builtin_mask_for_load
4281 || targetm.vectorize.builtin_mask_for_load ()))
4283 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4284 if ((nested_in_vect_loop
4285 && (TREE_INT_CST_LOW (DR_STEP (dr))
4286 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4287 || !loop_vinfo)
4288 return dr_explicit_realign;
4289 else
4290 return dr_explicit_realign_optimized;
4292 if (!known_alignment_for_access_p (dr))
4294 tree ba = DR_BASE_OBJECT (dr);
4296 if (ba)
4297 is_packed = contains_packed_reference (ba);
4300 if (targetm.vectorize.
4301 support_vector_misalignment (mode, type,
4302 DR_MISALIGNMENT (dr), is_packed))
4303 /* Can't software pipeline the loads, but can at least do them. */
4304 return dr_unaligned_supported;
4306 else
4308 bool is_packed = false;
4309 tree type = (TREE_TYPE (DR_REF (dr)));
4311 if (!known_alignment_for_access_p (dr))
4313 tree ba = DR_BASE_OBJECT (dr);
4315 if (ba)
4316 is_packed = contains_packed_reference (ba);
4319 if (targetm.vectorize.
4320 support_vector_misalignment (mode, type,
4321 DR_MISALIGNMENT (dr), is_packed))
4322 return dr_unaligned_supported;
4325 /* Unsupported. */
4326 return dr_unaligned_unsupported;