PR rtl-optimization/49095
[official-gcc.git] / gcc / tree-vect-data-refs.c
blobee059977559f17b4a0c36d4921048ecd76923bcc
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
49 static bool
50 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 tree vectype, unsigned HOST_WIDE_INT count)
53 enum machine_mode mode, array_mode;
54 bool limit_p;
56 mode = TYPE_MODE (vectype);
57 limit_p = !targetm.array_mode_supported_p (mode, count);
58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
59 MODE_INT, limit_p);
61 if (array_mode == BLKmode)
63 if (vect_print_dump_info (REPORT_DETAILS))
64 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65 GET_MODE_NAME (mode), count);
66 return false;
69 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
71 if (vect_print_dump_info (REPORT_DETAILS))
72 fprintf (vect_dump, "cannot use %s<%s><%s>",
73 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
74 return false;
77 if (vect_print_dump_info (REPORT_DETAILS))
78 fprintf (vect_dump, "can use %s<%s><%s>",
79 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
81 return true;
85 /* Return the smallest scalar part of STMT.
86 This is used to determine the vectype of the stmt. We generally set the
87 vectype according to the type of the result (lhs). For stmts whose
88 result-type is different than the type of the arguments (e.g., demotion,
89 promotion), vectype will be reset appropriately (later). Note that we have
90 to visit the smallest datatype in this function, because that determines the
91 VF. If the smallest datatype in the loop is present only as the rhs of a
92 promotion operation - we'd miss it.
93 Such a case, where a variable of this datatype does not appear in the lhs
94 anywhere in the loop, can only occur if it's an invariant: e.g.:
95 'int_x = (int) short_inv', which we'd expect to have been optimized away by
96 invariant motion. However, we cannot rely on invariant motion to always
97 take invariants out of the loop, and so in the case of promotion we also
98 have to check the rhs.
99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
100 types. */
102 tree
103 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104 HOST_WIDE_INT *rhs_size_unit)
106 tree scalar_type = gimple_expr_type (stmt);
107 HOST_WIDE_INT lhs, rhs;
109 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
111 if (is_gimple_assign (stmt)
112 && (gimple_assign_cast_p (stmt)
113 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119 if (rhs < lhs)
120 scalar_type = rhs_type;
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
125 return scalar_type;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135 gimple next_stmt = first_stmt;
136 int result = 0;
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139 return -1;
141 while (next_stmt && next_stmt != stmt)
143 result++;
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
147 if (next_stmt)
148 return result;
149 else
150 return -1;
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
158 static void
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
162 gimple prev, next;
163 tree next_init;
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169 while (next)
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
174 /* Insert here. */
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177 return;
179 prev = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
196 FIRST_DR = DRB
197 NEXT_DR (DRB) = DRA
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
204 FIRST_DR = DRB
205 NEXT(FIRST_DR) = previous FIRST_DR
206 else
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
212 static void
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
220 tree node_init;
221 gimple node, prev, next, first_stmt;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229 return;
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
238 return;
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246 old_first_stmt)));
247 gimple tmp;
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
259 while (tmp)
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
265 else
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
271 return;
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
278 return;
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
291 else
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
301 while (node)
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305 while (next)
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
310 /* Insert here. */
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313 prev = node;
314 break;
316 prev = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
319 if (!next)
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324 prev = node;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
337 static bool
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342 gimple earlier_stmt;
344 /* We only call this function for pairs of loads and stores, but we verify
345 it here. */
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 if (DR_IS_READ (dra))
349 return false;
350 else
351 return true;
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
358 return true;
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
367 return true;
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
374 return false;
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383 return false;
385 return true;
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
394 static bool
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
406 return false;
408 /* Check:
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
412 data-refs' inits. */
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
420 return false;
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
426 if (init_a > init_b)
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
432 if (step && (init_a - init_b) > step)
433 return false;
435 if (diff_mod_size == 0)
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
445 return true;
448 else
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
454 if (step && (init_b - init_a) > step)
455 return false;
457 if (diff_mod_size == 0)
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
467 return true;
471 return false;
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
478 static bool
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489 return true;
490 else
491 return false;
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
497 static bool
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504 return true;
505 else
506 return false;
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
513 static bool
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519 return false;
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
529 if (optimize_loop_nest_for_size_p (loop))
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
533 return false;
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
537 if (loop->inner)
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541 return false;
544 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
545 return true;
549 /* Function vect_analyze_data_ref_dependence.
551 Return TRUE if there (might) exist a dependence between a memory-reference
552 DRA and a memory-reference DRB. When versioning for alias may check a
553 dependence at run-time, return FALSE. Adjust *MAX_VF according to
554 the data dependence. */
556 static bool
557 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
558 loop_vec_info loop_vinfo, int *max_vf,
559 bool *data_dependence_in_bb)
561 unsigned int i;
562 struct loop *loop = NULL;
563 struct data_reference *dra = DDR_A (ddr);
564 struct data_reference *drb = DDR_B (ddr);
565 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
566 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
567 lambda_vector dist_v;
568 unsigned int loop_depth;
570 /* Don't bother to analyze statements marked as unvectorizable. */
571 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
572 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
573 return false;
575 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
577 /* Independent data accesses. */
578 vect_check_interleaving (dra, drb);
579 return false;
582 if (loop_vinfo)
583 loop = LOOP_VINFO_LOOP (loop_vinfo);
585 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
586 return false;
588 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
590 if (loop_vinfo)
592 if (vect_print_dump_info (REPORT_DR_DETAILS))
594 fprintf (vect_dump, "versioning for alias required: "
595 "can't determine dependence between ");
596 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
597 fprintf (vect_dump, " and ");
598 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
601 /* Add to list of ddrs that need to be tested at run-time. */
602 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
605 /* When vectorizing a basic block unknown depnedence can still mean
606 strided access. */
607 if (vect_check_interleaving (dra, drb))
608 return false;
610 if (vect_print_dump_info (REPORT_DR_DETAILS))
612 fprintf (vect_dump, "can't determine dependence between ");
613 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
614 fprintf (vect_dump, " and ");
615 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
618 /* We do not vectorize basic blocks with write-write dependencies. */
619 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
620 return true;
622 /* We deal with read-write dependencies in basic blocks later (by
623 verifying that all the loads in the basic block are before all the
624 stores). */
625 *data_dependence_in_bb = true;
626 return false;
629 /* Versioning for alias is not yet supported for basic block SLP, and
630 dependence distance is unapplicable, hence, in case of known data
631 dependence, basic block vectorization is impossible for now. */
632 if (!loop_vinfo)
634 if (dra != drb && vect_check_interleaving (dra, drb))
635 return false;
637 if (vect_print_dump_info (REPORT_DR_DETAILS))
639 fprintf (vect_dump, "determined dependence between ");
640 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
641 fprintf (vect_dump, " and ");
642 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
645 /* Do not vectorize basic blcoks with write-write dependences. */
646 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
647 return true;
649 /* Check if this dependence is allowed in basic block vectorization. */
650 return vect_drs_dependent_in_basic_block (dra, drb);
653 /* Loop-based vectorization and known data dependence. */
654 if (DDR_NUM_DIST_VECTS (ddr) == 0)
656 if (vect_print_dump_info (REPORT_DR_DETAILS))
658 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
659 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
660 fprintf (vect_dump, " and ");
661 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
663 /* Add to list of ddrs that need to be tested at run-time. */
664 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
667 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
668 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
670 int dist = dist_v[loop_depth];
672 if (vect_print_dump_info (REPORT_DR_DETAILS))
673 fprintf (vect_dump, "dependence distance = %d.", dist);
675 if (dist == 0)
677 if (vect_print_dump_info (REPORT_DR_DETAILS))
679 fprintf (vect_dump, "dependence distance == 0 between ");
680 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
681 fprintf (vect_dump, " and ");
682 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
685 /* For interleaving, mark that there is a read-write dependency if
686 necessary. We check before that one of the data-refs is store. */
687 if (DR_IS_READ (dra))
688 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
689 else
691 if (DR_IS_READ (drb))
692 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
695 continue;
698 if (dist > 0 && DDR_REVERSED_P (ddr))
700 /* If DDR_REVERSED_P the order of the data-refs in DDR was
701 reversed (to make distance vector positive), and the actual
702 distance is negative. */
703 if (vect_print_dump_info (REPORT_DR_DETAILS))
704 fprintf (vect_dump, "dependence distance negative.");
705 continue;
708 if (abs (dist) >= 2
709 && abs (dist) < *max_vf)
711 /* The dependence distance requires reduction of the maximal
712 vectorization factor. */
713 *max_vf = abs (dist);
714 if (vect_print_dump_info (REPORT_DR_DETAILS))
715 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
716 *max_vf);
719 if (abs (dist) >= *max_vf)
721 /* Dependence distance does not create dependence, as far as
722 vectorization is concerned, in this case. */
723 if (vect_print_dump_info (REPORT_DR_DETAILS))
724 fprintf (vect_dump, "dependence distance >= VF.");
725 continue;
728 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
730 fprintf (vect_dump, "not vectorized, possible dependence "
731 "between data-refs ");
732 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
733 fprintf (vect_dump, " and ");
734 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
737 return true;
740 return false;
743 /* Function vect_analyze_data_ref_dependences.
745 Examine all the data references in the loop, and make sure there do not
746 exist any data dependences between them. Set *MAX_VF according to
747 the maximum vectorization factor the data dependences allow. */
749 bool
750 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
751 bb_vec_info bb_vinfo, int *max_vf,
752 bool *data_dependence_in_bb)
754 unsigned int i;
755 VEC (ddr_p, heap) *ddrs = NULL;
756 struct data_dependence_relation *ddr;
758 if (vect_print_dump_info (REPORT_DETAILS))
759 fprintf (vect_dump, "=== vect_analyze_dependences ===");
761 if (loop_vinfo)
762 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
763 else
764 ddrs = BB_VINFO_DDRS (bb_vinfo);
766 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
767 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
768 data_dependence_in_bb))
769 return false;
771 return true;
775 /* Function vect_compute_data_ref_alignment
777 Compute the misalignment of the data reference DR.
779 Output:
780 1. If during the misalignment computation it is found that the data reference
781 cannot be vectorized then false is returned.
782 2. DR_MISALIGNMENT (DR) is defined.
784 FOR NOW: No analysis is actually performed. Misalignment is calculated
785 only for trivial cases. TODO. */
787 static bool
788 vect_compute_data_ref_alignment (struct data_reference *dr)
790 gimple stmt = DR_STMT (dr);
791 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
792 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
793 struct loop *loop = NULL;
794 tree ref = DR_REF (dr);
795 tree vectype;
796 tree base, base_addr;
797 bool base_aligned;
798 tree misalign;
799 tree aligned_to, alignment;
801 if (vect_print_dump_info (REPORT_DETAILS))
802 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
804 if (loop_vinfo)
805 loop = LOOP_VINFO_LOOP (loop_vinfo);
807 /* Initialize misalignment to unknown. */
808 SET_DR_MISALIGNMENT (dr, -1);
810 misalign = DR_INIT (dr);
811 aligned_to = DR_ALIGNED_TO (dr);
812 base_addr = DR_BASE_ADDRESS (dr);
813 vectype = STMT_VINFO_VECTYPE (stmt_info);
815 /* In case the dataref is in an inner-loop of the loop that is being
816 vectorized (LOOP), we use the base and misalignment information
817 relative to the outer-loop (LOOP). This is ok only if the misalignment
818 stays the same throughout the execution of the inner-loop, which is why
819 we have to check that the stride of the dataref in the inner-loop evenly
820 divides by the vector size. */
821 if (loop && nested_in_vect_loop_p (loop, stmt))
823 tree step = DR_STEP (dr);
824 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
826 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
828 if (vect_print_dump_info (REPORT_ALIGNMENT))
829 fprintf (vect_dump, "inner step divides the vector-size.");
830 misalign = STMT_VINFO_DR_INIT (stmt_info);
831 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
832 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
834 else
836 if (vect_print_dump_info (REPORT_ALIGNMENT))
837 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
838 misalign = NULL_TREE;
842 base = build_fold_indirect_ref (base_addr);
843 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
845 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
846 || !misalign)
848 if (vect_print_dump_info (REPORT_ALIGNMENT))
850 fprintf (vect_dump, "Unknown alignment for access: ");
851 print_generic_expr (vect_dump, base, TDF_SLIM);
853 return true;
856 if ((DECL_P (base)
857 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
858 alignment) >= 0)
859 || (TREE_CODE (base_addr) == SSA_NAME
860 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
861 TREE_TYPE (base_addr)))),
862 alignment) >= 0))
863 base_aligned = true;
864 else
865 base_aligned = false;
867 if (!base_aligned)
869 /* Do not change the alignment of global variables if
870 flag_section_anchors is enabled. */
871 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
872 || (TREE_STATIC (base) && flag_section_anchors))
874 if (vect_print_dump_info (REPORT_DETAILS))
876 fprintf (vect_dump, "can't force alignment of ref: ");
877 print_generic_expr (vect_dump, ref, TDF_SLIM);
879 return true;
882 /* Force the alignment of the decl.
883 NOTE: This is the only change to the code we make during
884 the analysis phase, before deciding to vectorize the loop. */
885 if (vect_print_dump_info (REPORT_DETAILS))
887 fprintf (vect_dump, "force alignment of ");
888 print_generic_expr (vect_dump, ref, TDF_SLIM);
891 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
892 DECL_USER_ALIGN (base) = 1;
895 /* At this point we assume that the base is aligned. */
896 gcc_assert (base_aligned
897 || (TREE_CODE (base) == VAR_DECL
898 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
900 /* If this is a backward running DR then first access in the larger
901 vectype actually is N-1 elements before the address in the DR.
902 Adjust misalign accordingly. */
903 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
905 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
906 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
907 otherwise we wouldn't be here. */
908 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
909 /* PLUS because DR_STEP was negative. */
910 misalign = size_binop (PLUS_EXPR, misalign, offset);
913 /* Modulo alignment. */
914 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
916 if (!host_integerp (misalign, 1))
918 /* Negative or overflowed misalignment value. */
919 if (vect_print_dump_info (REPORT_DETAILS))
920 fprintf (vect_dump, "unexpected misalign value");
921 return false;
924 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
926 if (vect_print_dump_info (REPORT_DETAILS))
928 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
929 print_generic_expr (vect_dump, ref, TDF_SLIM);
932 return true;
936 /* Function vect_compute_data_refs_alignment
938 Compute the misalignment of data references in the loop.
939 Return FALSE if a data reference is found that cannot be vectorized. */
941 static bool
942 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
943 bb_vec_info bb_vinfo)
945 VEC (data_reference_p, heap) *datarefs;
946 struct data_reference *dr;
947 unsigned int i;
949 if (loop_vinfo)
950 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
951 else
952 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
954 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
955 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
956 && !vect_compute_data_ref_alignment (dr))
958 if (bb_vinfo)
960 /* Mark unsupported statement as unvectorizable. */
961 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
962 continue;
964 else
965 return false;
968 return true;
972 /* Function vect_update_misalignment_for_peel
974 DR - the data reference whose misalignment is to be adjusted.
975 DR_PEEL - the data reference whose misalignment is being made
976 zero in the vector loop by the peel.
977 NPEEL - the number of iterations in the peel loop if the misalignment
978 of DR_PEEL is known at compile time. */
980 static void
981 vect_update_misalignment_for_peel (struct data_reference *dr,
982 struct data_reference *dr_peel, int npeel)
984 unsigned int i;
985 VEC(dr_p,heap) *same_align_drs;
986 struct data_reference *current_dr;
987 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
988 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
989 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
990 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
992 /* For interleaved data accesses the step in the loop must be multiplied by
993 the size of the interleaving group. */
994 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
995 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
996 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
997 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
999 /* It can be assumed that the data refs with the same alignment as dr_peel
1000 are aligned in the vector loop. */
1001 same_align_drs
1002 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1003 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1005 if (current_dr != dr)
1006 continue;
1007 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1008 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1009 SET_DR_MISALIGNMENT (dr, 0);
1010 return;
1013 if (known_alignment_for_access_p (dr)
1014 && known_alignment_for_access_p (dr_peel))
1016 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1017 int misal = DR_MISALIGNMENT (dr);
1018 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1019 misal += negative ? -npeel * dr_size : npeel * dr_size;
1020 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1021 SET_DR_MISALIGNMENT (dr, misal);
1022 return;
1025 if (vect_print_dump_info (REPORT_DETAILS))
1026 fprintf (vect_dump, "Setting misalignment to -1.");
1027 SET_DR_MISALIGNMENT (dr, -1);
1031 /* Function vect_verify_datarefs_alignment
1033 Return TRUE if all data references in the loop can be
1034 handled with respect to alignment. */
1036 bool
1037 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1039 VEC (data_reference_p, heap) *datarefs;
1040 struct data_reference *dr;
1041 enum dr_alignment_support supportable_dr_alignment;
1042 unsigned int i;
1044 if (loop_vinfo)
1045 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1046 else
1047 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1049 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1051 gimple stmt = DR_STMT (dr);
1052 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1054 /* For interleaving, only the alignment of the first access matters.
1055 Skip statements marked as not vectorizable. */
1056 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1057 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1058 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1059 continue;
1061 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1062 if (!supportable_dr_alignment)
1064 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1066 if (DR_IS_READ (dr))
1067 fprintf (vect_dump,
1068 "not vectorized: unsupported unaligned load.");
1069 else
1070 fprintf (vect_dump,
1071 "not vectorized: unsupported unaligned store.");
1073 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1075 return false;
1077 if (supportable_dr_alignment != dr_aligned
1078 && vect_print_dump_info (REPORT_ALIGNMENT))
1079 fprintf (vect_dump, "Vectorizing an unaligned access.");
1081 return true;
1085 /* Function vector_alignment_reachable_p
1087 Return true if vector alignment for DR is reachable by peeling
1088 a few loop iterations. Return false otherwise. */
1090 static bool
1091 vector_alignment_reachable_p (struct data_reference *dr)
1093 gimple stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1095 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1097 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1099 /* For interleaved access we peel only if number of iterations in
1100 the prolog loop ({VF - misalignment}), is a multiple of the
1101 number of the interleaved accesses. */
1102 int elem_size, mis_in_elements;
1103 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1105 /* FORNOW: handle only known alignment. */
1106 if (!known_alignment_for_access_p (dr))
1107 return false;
1109 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1110 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1112 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1113 return false;
1116 /* If misalignment is known at the compile time then allow peeling
1117 only if natural alignment is reachable through peeling. */
1118 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1120 HOST_WIDE_INT elmsize =
1121 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1122 if (vect_print_dump_info (REPORT_DETAILS))
1124 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1125 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1127 if (DR_MISALIGNMENT (dr) % elmsize)
1129 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1131 return false;
1135 if (!known_alignment_for_access_p (dr))
1137 tree type = (TREE_TYPE (DR_REF (dr)));
1138 tree ba = DR_BASE_OBJECT (dr);
1139 bool is_packed = false;
1141 if (ba)
1142 is_packed = contains_packed_reference (ba);
1144 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1145 is_packed = true;
1147 if (vect_print_dump_info (REPORT_DETAILS))
1148 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1149 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1150 return true;
1151 else
1152 return false;
1155 return true;
1159 /* Calculate the cost of the memory access represented by DR. */
1161 static void
1162 vect_get_data_access_cost (struct data_reference *dr,
1163 unsigned int *inside_cost,
1164 unsigned int *outside_cost)
1166 gimple stmt = DR_STMT (dr);
1167 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1168 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1169 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1170 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1171 int ncopies = vf / nunits;
1172 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1174 if (!supportable_dr_alignment)
1175 *inside_cost = VECT_MAX_COST;
1176 else
1178 if (DR_IS_READ (dr))
1179 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1180 else
1181 vect_get_store_cost (dr, ncopies, inside_cost);
1184 if (vect_print_dump_info (REPORT_COST))
1185 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1186 "outside_cost = %d.", *inside_cost, *outside_cost);
1190 static hashval_t
1191 vect_peeling_hash (const void *elem)
1193 const struct _vect_peel_info *peel_info;
1195 peel_info = (const struct _vect_peel_info *) elem;
1196 return (hashval_t) peel_info->npeel;
1200 static int
1201 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1203 const struct _vect_peel_info *a, *b;
1205 a = (const struct _vect_peel_info *) elem1;
1206 b = (const struct _vect_peel_info *) elem2;
1207 return (a->npeel == b->npeel);
1211 /* Insert DR into peeling hash table with NPEEL as key. */
1213 static void
1214 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1215 int npeel)
1217 struct _vect_peel_info elem, *slot;
1218 void **new_slot;
1219 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1221 elem.npeel = npeel;
1222 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1223 &elem);
1224 if (slot)
1225 slot->count++;
1226 else
1228 slot = XNEW (struct _vect_peel_info);
1229 slot->npeel = npeel;
1230 slot->dr = dr;
1231 slot->count = 1;
1232 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1233 INSERT);
1234 *new_slot = slot;
1237 if (!supportable_dr_alignment && !flag_vect_cost_model)
1238 slot->count += VECT_MAX_COST;
1242 /* Traverse peeling hash table to find peeling option that aligns maximum
1243 number of data accesses. */
1245 static int
1246 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1248 vect_peel_info elem = (vect_peel_info) *slot;
1249 vect_peel_extended_info max = (vect_peel_extended_info) data;
1251 if (elem->count > max->peel_info.count)
1253 max->peel_info.npeel = elem->npeel;
1254 max->peel_info.count = elem->count;
1255 max->peel_info.dr = elem->dr;
1258 return 1;
1262 /* Traverse peeling hash table and calculate cost for each peeling option.
1263 Find the one with the lowest cost. */
1265 static int
1266 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1268 vect_peel_info elem = (vect_peel_info) *slot;
1269 vect_peel_extended_info min = (vect_peel_extended_info) data;
1270 int save_misalignment, dummy;
1271 unsigned int inside_cost = 0, outside_cost = 0, i;
1272 gimple stmt = DR_STMT (elem->dr);
1273 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1274 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1275 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1276 struct data_reference *dr;
1278 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1280 stmt = DR_STMT (dr);
1281 stmt_info = vinfo_for_stmt (stmt);
1282 /* For interleaving, only the alignment of the first access
1283 matters. */
1284 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1285 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1286 continue;
1288 save_misalignment = DR_MISALIGNMENT (dr);
1289 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1290 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1291 SET_DR_MISALIGNMENT (dr, save_misalignment);
1294 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1295 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1297 if (inside_cost < min->inside_cost
1298 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1300 min->inside_cost = inside_cost;
1301 min->outside_cost = outside_cost;
1302 min->peel_info.dr = elem->dr;
1303 min->peel_info.npeel = elem->npeel;
1306 return 1;
1310 /* Choose best peeling option by traversing peeling hash table and either
1311 choosing an option with the lowest cost (if cost model is enabled) or the
1312 option that aligns as many accesses as possible. */
1314 static struct data_reference *
1315 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1316 unsigned int *npeel)
1318 struct _vect_peel_extended_info res;
1320 res.peel_info.dr = NULL;
1322 if (flag_vect_cost_model)
1324 res.inside_cost = INT_MAX;
1325 res.outside_cost = INT_MAX;
1326 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1327 vect_peeling_hash_get_lowest_cost, &res);
1329 else
1331 res.peel_info.count = 0;
1332 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1333 vect_peeling_hash_get_most_frequent, &res);
1336 *npeel = res.peel_info.npeel;
1337 return res.peel_info.dr;
1341 /* Function vect_enhance_data_refs_alignment
1343 This pass will use loop versioning and loop peeling in order to enhance
1344 the alignment of data references in the loop.
1346 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1347 original loop is to be vectorized. Any other loops that are created by
1348 the transformations performed in this pass - are not supposed to be
1349 vectorized. This restriction will be relaxed.
1351 This pass will require a cost model to guide it whether to apply peeling
1352 or versioning or a combination of the two. For example, the scheme that
1353 intel uses when given a loop with several memory accesses, is as follows:
1354 choose one memory access ('p') which alignment you want to force by doing
1355 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1356 other accesses are not necessarily aligned, or (2) use loop versioning to
1357 generate one loop in which all accesses are aligned, and another loop in
1358 which only 'p' is necessarily aligned.
1360 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1361 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1362 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1364 Devising a cost model is the most critical aspect of this work. It will
1365 guide us on which access to peel for, whether to use loop versioning, how
1366 many versions to create, etc. The cost model will probably consist of
1367 generic considerations as well as target specific considerations (on
1368 powerpc for example, misaligned stores are more painful than misaligned
1369 loads).
1371 Here are the general steps involved in alignment enhancements:
1373 -- original loop, before alignment analysis:
1374 for (i=0; i<N; i++){
1375 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1376 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1379 -- After vect_compute_data_refs_alignment:
1380 for (i=0; i<N; i++){
1381 x = q[i]; # DR_MISALIGNMENT(q) = 3
1382 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1385 -- Possibility 1: we do loop versioning:
1386 if (p is aligned) {
1387 for (i=0; i<N; i++){ # loop 1A
1388 x = q[i]; # DR_MISALIGNMENT(q) = 3
1389 p[i] = y; # DR_MISALIGNMENT(p) = 0
1392 else {
1393 for (i=0; i<N; i++){ # loop 1B
1394 x = q[i]; # DR_MISALIGNMENT(q) = 3
1395 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1399 -- Possibility 2: we do loop peeling:
1400 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1401 x = q[i];
1402 p[i] = y;
1404 for (i = 3; i < N; i++){ # loop 2A
1405 x = q[i]; # DR_MISALIGNMENT(q) = 0
1406 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1409 -- Possibility 3: combination of loop peeling and versioning:
1410 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1411 x = q[i];
1412 p[i] = y;
1414 if (p is aligned) {
1415 for (i = 3; i<N; i++){ # loop 3A
1416 x = q[i]; # DR_MISALIGNMENT(q) = 0
1417 p[i] = y; # DR_MISALIGNMENT(p) = 0
1420 else {
1421 for (i = 3; i<N; i++){ # loop 3B
1422 x = q[i]; # DR_MISALIGNMENT(q) = 0
1423 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1427 These loops are later passed to loop_transform to be vectorized. The
1428 vectorizer will use the alignment information to guide the transformation
1429 (whether to generate regular loads/stores, or with special handling for
1430 misalignment). */
1432 bool
1433 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1435 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1436 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1437 enum dr_alignment_support supportable_dr_alignment;
1438 struct data_reference *dr0 = NULL, *first_store = NULL;
1439 struct data_reference *dr;
1440 unsigned int i, j;
1441 bool do_peeling = false;
1442 bool do_versioning = false;
1443 bool stat;
1444 gimple stmt;
1445 stmt_vec_info stmt_info;
1446 int vect_versioning_for_alias_required;
1447 unsigned int npeel = 0;
1448 bool all_misalignments_unknown = true;
1449 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1450 unsigned possible_npeel_number = 1;
1451 tree vectype;
1452 unsigned int nelements, mis, same_align_drs_max = 0;
1454 if (vect_print_dump_info (REPORT_DETAILS))
1455 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1457 /* While cost model enhancements are expected in the future, the high level
1458 view of the code at this time is as follows:
1460 A) If there is a misaligned access then see if peeling to align
1461 this access can make all data references satisfy
1462 vect_supportable_dr_alignment. If so, update data structures
1463 as needed and return true.
1465 B) If peeling wasn't possible and there is a data reference with an
1466 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1467 then see if loop versioning checks can be used to make all data
1468 references satisfy vect_supportable_dr_alignment. If so, update
1469 data structures as needed and return true.
1471 C) If neither peeling nor versioning were successful then return false if
1472 any data reference does not satisfy vect_supportable_dr_alignment.
1474 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1476 Note, Possibility 3 above (which is peeling and versioning together) is not
1477 being done at this time. */
1479 /* (1) Peeling to force alignment. */
1481 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1482 Considerations:
1483 + How many accesses will become aligned due to the peeling
1484 - How many accesses will become unaligned due to the peeling,
1485 and the cost of misaligned accesses.
1486 - The cost of peeling (the extra runtime checks, the increase
1487 in code size). */
1489 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1491 stmt = DR_STMT (dr);
1492 stmt_info = vinfo_for_stmt (stmt);
1494 /* For interleaving, only the alignment of the first access
1495 matters. */
1496 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1497 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1498 continue;
1500 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1501 do_peeling = vector_alignment_reachable_p (dr);
1502 if (do_peeling)
1504 if (known_alignment_for_access_p (dr))
1506 unsigned int npeel_tmp;
1507 bool negative = tree_int_cst_compare (DR_STEP (dr),
1508 size_zero_node) < 0;
1510 /* Save info about DR in the hash table. */
1511 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1512 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1513 htab_create (1, vect_peeling_hash,
1514 vect_peeling_hash_eq, free);
1516 vectype = STMT_VINFO_VECTYPE (stmt_info);
1517 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1518 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1519 TREE_TYPE (DR_REF (dr))));
1520 npeel_tmp = (negative
1521 ? (mis - nelements) : (nelements - mis))
1522 & (nelements - 1);
1524 /* For multiple types, it is possible that the bigger type access
1525 will have more than one peeling option. E.g., a loop with two
1526 types: one of size (vector size / 4), and the other one of
1527 size (vector size / 8). Vectorization factor will 8. If both
1528 access are misaligned by 3, the first one needs one scalar
1529 iteration to be aligned, and the second one needs 5. But the
1530 the first one will be aligned also by peeling 5 scalar
1531 iterations, and in that case both accesses will be aligned.
1532 Hence, except for the immediate peeling amount, we also want
1533 to try to add full vector size, while we don't exceed
1534 vectorization factor.
1535 We do this automtically for cost model, since we calculate cost
1536 for every peeling option. */
1537 if (!flag_vect_cost_model)
1538 possible_npeel_number = vf /nelements;
1540 /* Handle the aligned case. We may decide to align some other
1541 access, making DR unaligned. */
1542 if (DR_MISALIGNMENT (dr) == 0)
1544 npeel_tmp = 0;
1545 if (!flag_vect_cost_model)
1546 possible_npeel_number++;
1549 for (j = 0; j < possible_npeel_number; j++)
1551 gcc_assert (npeel_tmp <= vf);
1552 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1553 npeel_tmp += nelements;
1556 all_misalignments_unknown = false;
1557 /* Data-ref that was chosen for the case that all the
1558 misalignments are unknown is not relevant anymore, since we
1559 have a data-ref with known alignment. */
1560 dr0 = NULL;
1562 else
1564 /* If we don't know all the misalignment values, we prefer
1565 peeling for data-ref that has maximum number of data-refs
1566 with the same alignment, unless the target prefers to align
1567 stores over load. */
1568 if (all_misalignments_unknown)
1570 if (same_align_drs_max < VEC_length (dr_p,
1571 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1572 || !dr0)
1574 same_align_drs_max = VEC_length (dr_p,
1575 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1576 dr0 = dr;
1579 if (!first_store && DR_IS_WRITE (dr))
1580 first_store = dr;
1583 /* If there are both known and unknown misaligned accesses in the
1584 loop, we choose peeling amount according to the known
1585 accesses. */
1588 if (!supportable_dr_alignment)
1590 dr0 = dr;
1591 if (!first_store && DR_IS_WRITE (dr))
1592 first_store = dr;
1596 else
1598 if (!aligned_access_p (dr))
1600 if (vect_print_dump_info (REPORT_DETAILS))
1601 fprintf (vect_dump, "vector alignment may not be reachable");
1603 break;
1608 vect_versioning_for_alias_required
1609 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1611 /* Temporarily, if versioning for alias is required, we disable peeling
1612 until we support peeling and versioning. Often peeling for alignment
1613 will require peeling for loop-bound, which in turn requires that we
1614 know how to adjust the loop ivs after the loop. */
1615 if (vect_versioning_for_alias_required
1616 || !vect_can_advance_ivs_p (loop_vinfo)
1617 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1618 do_peeling = false;
1620 if (do_peeling && all_misalignments_unknown
1621 && vect_supportable_dr_alignment (dr0, false))
1624 /* Check if the target requires to prefer stores over loads, i.e., if
1625 misaligned stores are more expensive than misaligned loads (taking
1626 drs with same alignment into account). */
1627 if (first_store && DR_IS_READ (dr0))
1629 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1630 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1631 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1632 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1634 vect_get_data_access_cost (dr0, &load_inside_cost,
1635 &load_outside_cost);
1636 vect_get_data_access_cost (first_store, &store_inside_cost,
1637 &store_outside_cost);
1639 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1640 aligning the load DR0). */
1641 load_inside_penalty = store_inside_cost;
1642 load_outside_penalty = store_outside_cost;
1643 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1644 (vinfo_for_stmt (DR_STMT (first_store))),
1645 i, dr);
1646 i++)
1647 if (DR_IS_READ (dr))
1649 load_inside_penalty += load_inside_cost;
1650 load_outside_penalty += load_outside_cost;
1652 else
1654 load_inside_penalty += store_inside_cost;
1655 load_outside_penalty += store_outside_cost;
1658 /* Calculate the penalty for leaving DR0 unaligned (by
1659 aligning the FIRST_STORE). */
1660 store_inside_penalty = load_inside_cost;
1661 store_outside_penalty = load_outside_cost;
1662 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1663 (vinfo_for_stmt (DR_STMT (dr0))),
1664 i, dr);
1665 i++)
1666 if (DR_IS_READ (dr))
1668 store_inside_penalty += load_inside_cost;
1669 store_outside_penalty += load_outside_cost;
1671 else
1673 store_inside_penalty += store_inside_cost;
1674 store_outside_penalty += store_outside_cost;
1677 if (load_inside_penalty > store_inside_penalty
1678 || (load_inside_penalty == store_inside_penalty
1679 && load_outside_penalty > store_outside_penalty))
1680 dr0 = first_store;
1683 /* In case there are only loads with different unknown misalignments, use
1684 peeling only if it may help to align other accesses in the loop. */
1685 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1686 (vinfo_for_stmt (DR_STMT (dr0))))
1687 && vect_supportable_dr_alignment (dr0, false)
1688 != dr_unaligned_supported)
1689 do_peeling = false;
1692 if (do_peeling && !dr0)
1694 /* Peeling is possible, but there is no data access that is not supported
1695 unless aligned. So we try to choose the best possible peeling. */
1697 /* We should get here only if there are drs with known misalignment. */
1698 gcc_assert (!all_misalignments_unknown);
1700 /* Choose the best peeling from the hash table. */
1701 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1702 if (!dr0 || !npeel)
1703 do_peeling = false;
1706 if (do_peeling)
1708 stmt = DR_STMT (dr0);
1709 stmt_info = vinfo_for_stmt (stmt);
1710 vectype = STMT_VINFO_VECTYPE (stmt_info);
1711 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1713 if (known_alignment_for_access_p (dr0))
1715 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1716 size_zero_node) < 0;
1717 if (!npeel)
1719 /* Since it's known at compile time, compute the number of
1720 iterations in the peeled loop (the peeling factor) for use in
1721 updating DR_MISALIGNMENT values. The peeling factor is the
1722 vectorization factor minus the misalignment as an element
1723 count. */
1724 mis = DR_MISALIGNMENT (dr0);
1725 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1726 npeel = ((negative ? mis - nelements : nelements - mis)
1727 & (nelements - 1));
1730 /* For interleaved data access every iteration accesses all the
1731 members of the group, therefore we divide the number of iterations
1732 by the group size. */
1733 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1734 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1735 npeel /= GROUP_SIZE (stmt_info);
1737 if (vect_print_dump_info (REPORT_DETAILS))
1738 fprintf (vect_dump, "Try peeling by %d", npeel);
1741 /* Ensure that all data refs can be vectorized after the peel. */
1742 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1744 int save_misalignment;
1746 if (dr == dr0)
1747 continue;
1749 stmt = DR_STMT (dr);
1750 stmt_info = vinfo_for_stmt (stmt);
1751 /* For interleaving, only the alignment of the first access
1752 matters. */
1753 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1754 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1755 continue;
1757 save_misalignment = DR_MISALIGNMENT (dr);
1758 vect_update_misalignment_for_peel (dr, dr0, npeel);
1759 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1760 SET_DR_MISALIGNMENT (dr, save_misalignment);
1762 if (!supportable_dr_alignment)
1764 do_peeling = false;
1765 break;
1769 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1771 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1772 if (!stat)
1773 do_peeling = false;
1774 else
1775 return stat;
1778 if (do_peeling)
1780 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1781 If the misalignment of DR_i is identical to that of dr0 then set
1782 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1783 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1784 by the peeling factor times the element size of DR_i (MOD the
1785 vectorization factor times the size). Otherwise, the
1786 misalignment of DR_i must be set to unknown. */
1787 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1788 if (dr != dr0)
1789 vect_update_misalignment_for_peel (dr, dr0, npeel);
1791 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1792 if (npeel)
1793 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1794 else
1795 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1796 SET_DR_MISALIGNMENT (dr0, 0);
1797 if (vect_print_dump_info (REPORT_ALIGNMENT))
1798 fprintf (vect_dump, "Alignment of access forced using peeling.");
1800 if (vect_print_dump_info (REPORT_DETAILS))
1801 fprintf (vect_dump, "Peeling for alignment will be applied.");
1803 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1804 gcc_assert (stat);
1805 return stat;
1810 /* (2) Versioning to force alignment. */
1812 /* Try versioning if:
1813 1) flag_tree_vect_loop_version is TRUE
1814 2) optimize loop for speed
1815 3) there is at least one unsupported misaligned data ref with an unknown
1816 misalignment, and
1817 4) all misaligned data refs with a known misalignment are supported, and
1818 5) the number of runtime alignment checks is within reason. */
1820 do_versioning =
1821 flag_tree_vect_loop_version
1822 && optimize_loop_nest_for_speed_p (loop)
1823 && (!loop->inner); /* FORNOW */
1825 if (do_versioning)
1827 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1829 stmt = DR_STMT (dr);
1830 stmt_info = vinfo_for_stmt (stmt);
1832 /* For interleaving, only the alignment of the first access
1833 matters. */
1834 if (aligned_access_p (dr)
1835 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1836 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1837 continue;
1839 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1841 if (!supportable_dr_alignment)
1843 gimple stmt;
1844 int mask;
1845 tree vectype;
1847 if (known_alignment_for_access_p (dr)
1848 || VEC_length (gimple,
1849 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1850 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1852 do_versioning = false;
1853 break;
1856 stmt = DR_STMT (dr);
1857 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1858 gcc_assert (vectype);
1860 /* The rightmost bits of an aligned address must be zeros.
1861 Construct the mask needed for this test. For example,
1862 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1863 mask must be 15 = 0xf. */
1864 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1866 /* FORNOW: use the same mask to test all potentially unaligned
1867 references in the loop. The vectorizer currently supports
1868 a single vector size, see the reference to
1869 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1870 vectorization factor is computed. */
1871 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1872 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1873 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1874 VEC_safe_push (gimple, heap,
1875 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1876 DR_STMT (dr));
1880 /* Versioning requires at least one misaligned data reference. */
1881 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1882 do_versioning = false;
1883 else if (!do_versioning)
1884 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1887 if (do_versioning)
1889 VEC(gimple,heap) *may_misalign_stmts
1890 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1891 gimple stmt;
1893 /* It can now be assumed that the data references in the statements
1894 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1895 of the loop being vectorized. */
1896 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1898 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1899 dr = STMT_VINFO_DATA_REF (stmt_info);
1900 SET_DR_MISALIGNMENT (dr, 0);
1901 if (vect_print_dump_info (REPORT_ALIGNMENT))
1902 fprintf (vect_dump, "Alignment of access forced using versioning.");
1905 if (vect_print_dump_info (REPORT_DETAILS))
1906 fprintf (vect_dump, "Versioning for alignment will be applied.");
1908 /* Peeling and versioning can't be done together at this time. */
1909 gcc_assert (! (do_peeling && do_versioning));
1911 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1912 gcc_assert (stat);
1913 return stat;
1916 /* This point is reached if neither peeling nor versioning is being done. */
1917 gcc_assert (! (do_peeling || do_versioning));
1919 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1920 return stat;
1924 /* Function vect_find_same_alignment_drs.
1926 Update group and alignment relations according to the chosen
1927 vectorization factor. */
1929 static void
1930 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1931 loop_vec_info loop_vinfo)
1933 unsigned int i;
1934 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1935 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1936 struct data_reference *dra = DDR_A (ddr);
1937 struct data_reference *drb = DDR_B (ddr);
1938 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1939 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1940 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1941 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1942 lambda_vector dist_v;
1943 unsigned int loop_depth;
1945 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1946 return;
1948 if (dra == drb)
1949 return;
1951 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1952 return;
1954 /* Loop-based vectorization and known data dependence. */
1955 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1956 return;
1958 /* Data-dependence analysis reports a distance vector of zero
1959 for data-references that overlap only in the first iteration
1960 but have different sign step (see PR45764).
1961 So as a sanity check require equal DR_STEP. */
1962 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1963 return;
1965 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1966 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1968 int dist = dist_v[loop_depth];
1970 if (vect_print_dump_info (REPORT_DR_DETAILS))
1971 fprintf (vect_dump, "dependence distance = %d.", dist);
1973 /* Same loop iteration. */
1974 if (dist == 0
1975 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1977 /* Two references with distance zero have the same alignment. */
1978 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1979 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1980 if (vect_print_dump_info (REPORT_ALIGNMENT))
1981 fprintf (vect_dump, "accesses have the same alignment.");
1982 if (vect_print_dump_info (REPORT_DR_DETAILS))
1984 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1985 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1986 fprintf (vect_dump, " and ");
1987 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1994 /* Function vect_analyze_data_refs_alignment
1996 Analyze the alignment of the data-references in the loop.
1997 Return FALSE if a data reference is found that cannot be vectorized. */
1999 bool
2000 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2001 bb_vec_info bb_vinfo)
2003 if (vect_print_dump_info (REPORT_DETAILS))
2004 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2006 /* Mark groups of data references with same alignment using
2007 data dependence information. */
2008 if (loop_vinfo)
2010 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2011 struct data_dependence_relation *ddr;
2012 unsigned int i;
2014 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2015 vect_find_same_alignment_drs (ddr, loop_vinfo);
2018 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2020 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2021 fprintf (vect_dump,
2022 "not vectorized: can't calculate alignment for data ref.");
2023 return false;
2026 return true;
2030 /* Analyze groups of strided accesses: check that DR belongs to a group of
2031 strided accesses of legal size, step, etc. Detect gaps, single element
2032 interleaving, and other special cases. Set strided access info.
2033 Collect groups of strided stores for further use in SLP analysis. */
2035 static bool
2036 vect_analyze_group_access (struct data_reference *dr)
2038 tree step = DR_STEP (dr);
2039 tree scalar_type = TREE_TYPE (DR_REF (dr));
2040 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2041 gimple stmt = DR_STMT (dr);
2042 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2043 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2044 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2045 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2046 HOST_WIDE_INT stride, last_accessed_element = 1;
2047 bool slp_impossible = false;
2049 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2050 interleaving group (including gaps). */
2051 stride = dr_step / type_size;
2053 /* Not consecutive access is possible only if it is a part of interleaving. */
2054 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2056 /* Check if it this DR is a part of interleaving, and is a single
2057 element of the group that is accessed in the loop. */
2059 /* Gaps are supported only for loads. STEP must be a multiple of the type
2060 size. The size of the group must be a power of 2. */
2061 if (DR_IS_READ (dr)
2062 && (dr_step % type_size) == 0
2063 && stride > 0
2064 && exact_log2 (stride) != -1)
2066 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2067 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2068 if (vect_print_dump_info (REPORT_DR_DETAILS))
2070 fprintf (vect_dump, "Detected single element interleaving ");
2071 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2072 fprintf (vect_dump, " step ");
2073 print_generic_expr (vect_dump, step, TDF_SLIM);
2076 if (loop_vinfo)
2078 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2080 if (vect_print_dump_info (REPORT_DETAILS))
2081 fprintf (vect_dump, "Data access with gaps requires scalar "
2082 "epilogue loop");
2085 return true;
2088 if (vect_print_dump_info (REPORT_DETAILS))
2090 fprintf (vect_dump, "not consecutive access ");
2091 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2094 if (bb_vinfo)
2096 /* Mark the statement as unvectorizable. */
2097 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2098 return true;
2101 return false;
2104 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2106 /* First stmt in the interleaving chain. Check the chain. */
2107 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2108 struct data_reference *data_ref = dr;
2109 unsigned int count = 1;
2110 tree next_step;
2111 tree prev_init = DR_INIT (data_ref);
2112 gimple prev = stmt;
2113 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2115 while (next)
2117 /* Skip same data-refs. In case that two or more stmts share
2118 data-ref (supported only for loads), we vectorize only the first
2119 stmt, and the rest get their vectorized loads from the first
2120 one. */
2121 if (!tree_int_cst_compare (DR_INIT (data_ref),
2122 DR_INIT (STMT_VINFO_DATA_REF (
2123 vinfo_for_stmt (next)))))
2125 if (DR_IS_WRITE (data_ref))
2127 if (vect_print_dump_info (REPORT_DETAILS))
2128 fprintf (vect_dump, "Two store stmts share the same dr.");
2129 return false;
2132 /* Check that there is no load-store dependencies for this loads
2133 to prevent a case of load-store-load to the same location. */
2134 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2135 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2137 if (vect_print_dump_info (REPORT_DETAILS))
2138 fprintf (vect_dump,
2139 "READ_WRITE dependence in interleaving.");
2140 return false;
2143 /* For load use the same data-ref load. */
2144 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2146 prev = next;
2147 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2148 continue;
2151 prev = next;
2153 /* Check that all the accesses have the same STEP. */
2154 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2155 if (tree_int_cst_compare (step, next_step))
2157 if (vect_print_dump_info (REPORT_DETAILS))
2158 fprintf (vect_dump, "not consecutive access in interleaving");
2159 return false;
2162 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2163 /* Check that the distance between two accesses is equal to the type
2164 size. Otherwise, we have gaps. */
2165 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2166 - TREE_INT_CST_LOW (prev_init)) / type_size;
2167 if (diff != 1)
2169 /* FORNOW: SLP of accesses with gaps is not supported. */
2170 slp_impossible = true;
2171 if (DR_IS_WRITE (data_ref))
2173 if (vect_print_dump_info (REPORT_DETAILS))
2174 fprintf (vect_dump, "interleaved store with gaps");
2175 return false;
2178 gaps += diff - 1;
2181 last_accessed_element += diff;
2183 /* Store the gap from the previous member of the group. If there is no
2184 gap in the access, GROUP_GAP is always 1. */
2185 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2187 prev_init = DR_INIT (data_ref);
2188 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2189 /* Count the number of data-refs in the chain. */
2190 count++;
2193 /* COUNT is the number of accesses found, we multiply it by the size of
2194 the type to get COUNT_IN_BYTES. */
2195 count_in_bytes = type_size * count;
2197 /* Check that the size of the interleaving (including gaps) is not
2198 greater than STEP. */
2199 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2201 if (vect_print_dump_info (REPORT_DETAILS))
2203 fprintf (vect_dump, "interleaving size is greater than step for ");
2204 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2206 return false;
2209 /* Check that the size of the interleaving is equal to STEP for stores,
2210 i.e., that there are no gaps. */
2211 if (dr_step && dr_step != count_in_bytes)
2213 if (DR_IS_READ (dr))
2215 slp_impossible = true;
2216 /* There is a gap after the last load in the group. This gap is a
2217 difference between the stride and the number of elements. When
2218 there is no gap, this difference should be 0. */
2219 GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2221 else
2223 if (vect_print_dump_info (REPORT_DETAILS))
2224 fprintf (vect_dump, "interleaved store with gaps");
2225 return false;
2229 /* Check that STEP is a multiple of type size. */
2230 if (dr_step && (dr_step % type_size) != 0)
2232 if (vect_print_dump_info (REPORT_DETAILS))
2234 fprintf (vect_dump, "step is not a multiple of type size: step ");
2235 print_generic_expr (vect_dump, step, TDF_SLIM);
2236 fprintf (vect_dump, " size ");
2237 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2238 TDF_SLIM);
2240 return false;
2243 if (stride == 0)
2244 stride = count;
2246 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2247 if (vect_print_dump_info (REPORT_DETAILS))
2248 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2250 /* SLP: create an SLP data structure for every interleaving group of
2251 stores for further analysis in vect_analyse_slp. */
2252 if (DR_IS_WRITE (dr) && !slp_impossible)
2254 if (loop_vinfo)
2255 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2256 stmt);
2257 if (bb_vinfo)
2258 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2259 stmt);
2262 /* There is a gap in the end of the group. */
2263 if (stride - last_accessed_element > 0 && loop_vinfo)
2265 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2266 if (vect_print_dump_info (REPORT_DETAILS))
2267 fprintf (vect_dump, "Data access with gaps requires scalar "
2268 "epilogue loop");
2272 return true;
2276 /* Analyze the access pattern of the data-reference DR.
2277 In case of non-consecutive accesses call vect_analyze_group_access() to
2278 analyze groups of strided accesses. */
2280 static bool
2281 vect_analyze_data_ref_access (struct data_reference *dr)
2283 tree step = DR_STEP (dr);
2284 tree scalar_type = TREE_TYPE (DR_REF (dr));
2285 gimple stmt = DR_STMT (dr);
2286 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2287 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2288 struct loop *loop = NULL;
2289 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2291 if (loop_vinfo)
2292 loop = LOOP_VINFO_LOOP (loop_vinfo);
2294 if (loop_vinfo && !step)
2296 if (vect_print_dump_info (REPORT_DETAILS))
2297 fprintf (vect_dump, "bad data-ref access in loop");
2298 return false;
2301 /* Don't allow invariant accesses in loops. */
2302 if (loop_vinfo && dr_step == 0)
2303 return false;
2305 if (loop && nested_in_vect_loop_p (loop, stmt))
2307 /* Interleaved accesses are not yet supported within outer-loop
2308 vectorization for references in the inner-loop. */
2309 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2311 /* For the rest of the analysis we use the outer-loop step. */
2312 step = STMT_VINFO_DR_STEP (stmt_info);
2313 dr_step = TREE_INT_CST_LOW (step);
2315 if (dr_step == 0)
2317 if (vect_print_dump_info (REPORT_ALIGNMENT))
2318 fprintf (vect_dump, "zero step in outer loop.");
2319 if (DR_IS_READ (dr))
2320 return true;
2321 else
2322 return false;
2326 /* Consecutive? */
2327 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2328 || (dr_step < 0
2329 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2331 /* Mark that it is not interleaving. */
2332 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2333 return true;
2336 if (loop && nested_in_vect_loop_p (loop, stmt))
2338 if (vect_print_dump_info (REPORT_ALIGNMENT))
2339 fprintf (vect_dump, "strided access in outer loop.");
2340 return false;
2343 /* Not consecutive access - check if it's a part of interleaving group. */
2344 return vect_analyze_group_access (dr);
2348 /* Function vect_analyze_data_ref_accesses.
2350 Analyze the access pattern of all the data references in the loop.
2352 FORNOW: the only access pattern that is considered vectorizable is a
2353 simple step 1 (consecutive) access.
2355 FORNOW: handle only arrays and pointer accesses. */
2357 bool
2358 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2360 unsigned int i;
2361 VEC (data_reference_p, heap) *datarefs;
2362 struct data_reference *dr;
2364 if (vect_print_dump_info (REPORT_DETAILS))
2365 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2367 if (loop_vinfo)
2368 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2369 else
2370 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2372 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2373 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2374 && !vect_analyze_data_ref_access (dr))
2376 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2377 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2379 if (bb_vinfo)
2381 /* Mark the statement as not vectorizable. */
2382 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2383 continue;
2385 else
2386 return false;
2389 return true;
2392 /* Function vect_prune_runtime_alias_test_list.
2394 Prune a list of ddrs to be tested at run-time by versioning for alias.
2395 Return FALSE if resulting list of ddrs is longer then allowed by
2396 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2398 bool
2399 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2401 VEC (ddr_p, heap) * ddrs =
2402 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2403 unsigned i, j;
2405 if (vect_print_dump_info (REPORT_DETAILS))
2406 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2408 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2410 bool found;
2411 ddr_p ddr_i;
2413 ddr_i = VEC_index (ddr_p, ddrs, i);
2414 found = false;
2416 for (j = 0; j < i; j++)
2418 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2420 if (vect_vfa_range_equal (ddr_i, ddr_j))
2422 if (vect_print_dump_info (REPORT_DR_DETAILS))
2424 fprintf (vect_dump, "found equal ranges ");
2425 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2426 fprintf (vect_dump, ", ");
2427 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2428 fprintf (vect_dump, " and ");
2429 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2430 fprintf (vect_dump, ", ");
2431 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2433 found = true;
2434 break;
2438 if (found)
2440 VEC_ordered_remove (ddr_p, ddrs, i);
2441 continue;
2443 i++;
2446 if (VEC_length (ddr_p, ddrs) >
2447 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2449 if (vect_print_dump_info (REPORT_DR_DETAILS))
2451 fprintf (vect_dump,
2452 "disable versioning for alias - max number of generated "
2453 "checks exceeded.");
2456 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2458 return false;
2461 return true;
2465 /* Function vect_analyze_data_refs.
2467 Find all the data references in the loop or basic block.
2469 The general structure of the analysis of data refs in the vectorizer is as
2470 follows:
2471 1- vect_analyze_data_refs(loop/bb): call
2472 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2473 in the loop/bb and their dependences.
2474 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2475 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2476 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2480 bool
2481 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2482 bb_vec_info bb_vinfo,
2483 int *min_vf)
2485 struct loop *loop = NULL;
2486 basic_block bb = NULL;
2487 unsigned int i;
2488 VEC (data_reference_p, heap) *datarefs;
2489 struct data_reference *dr;
2490 tree scalar_type;
2491 bool res;
2493 if (vect_print_dump_info (REPORT_DETAILS))
2494 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2496 if (loop_vinfo)
2498 loop = LOOP_VINFO_LOOP (loop_vinfo);
2499 res = compute_data_dependences_for_loop
2500 (loop, true,
2501 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2502 &LOOP_VINFO_DATAREFS (loop_vinfo),
2503 &LOOP_VINFO_DDRS (loop_vinfo));
2505 if (!res)
2507 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2508 fprintf (vect_dump, "not vectorized: loop contains function calls"
2509 " or data references that cannot be analyzed");
2510 return false;
2513 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2515 else
2517 bb = BB_VINFO_BB (bb_vinfo);
2518 res = compute_data_dependences_for_bb (bb, true,
2519 &BB_VINFO_DATAREFS (bb_vinfo),
2520 &BB_VINFO_DDRS (bb_vinfo));
2521 if (!res)
2523 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2524 fprintf (vect_dump, "not vectorized: basic block contains function"
2525 " calls or data references that cannot be analyzed");
2526 return false;
2529 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2532 /* Go through the data-refs, check that the analysis succeeded. Update
2533 pointer from stmt_vec_info struct to DR and vectype. */
2535 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2537 gimple stmt;
2538 stmt_vec_info stmt_info;
2539 tree base, offset, init;
2540 int vf;
2542 if (!dr || !DR_REF (dr))
2544 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2545 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2546 return false;
2549 stmt = DR_STMT (dr);
2550 stmt_info = vinfo_for_stmt (stmt);
2552 /* Check that analysis of the data-ref succeeded. */
2553 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2554 || !DR_STEP (dr))
2556 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2558 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2559 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2562 if (bb_vinfo)
2564 /* Mark the statement as not vectorizable. */
2565 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2566 continue;
2568 else
2569 return false;
2572 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2574 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2575 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2576 "constant");
2577 if (bb_vinfo)
2579 /* Mark the statement as not vectorizable. */
2580 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2581 continue;
2583 else
2584 return false;
2587 base = unshare_expr (DR_BASE_ADDRESS (dr));
2588 offset = unshare_expr (DR_OFFSET (dr));
2589 init = unshare_expr (DR_INIT (dr));
2591 if (stmt_can_throw_internal (stmt))
2593 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2595 fprintf (vect_dump, "not vectorized: statement can throw an "
2596 "exception ");
2597 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2599 return false;
2602 /* Update DR field in stmt_vec_info struct. */
2604 /* If the dataref is in an inner-loop of the loop that is considered for
2605 for vectorization, we also want to analyze the access relative to
2606 the outer-loop (DR contains information only relative to the
2607 inner-most enclosing loop). We do that by building a reference to the
2608 first location accessed by the inner-loop, and analyze it relative to
2609 the outer-loop. */
2610 if (loop && nested_in_vect_loop_p (loop, stmt))
2612 tree outer_step, outer_base, outer_init;
2613 HOST_WIDE_INT pbitsize, pbitpos;
2614 tree poffset;
2615 enum machine_mode pmode;
2616 int punsignedp, pvolatilep;
2617 affine_iv base_iv, offset_iv;
2618 tree dinit;
2620 /* Build a reference to the first location accessed by the
2621 inner-loop: *(BASE+INIT). (The first location is actually
2622 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2623 tree inner_base = build_fold_indirect_ref
2624 (fold_build2 (POINTER_PLUS_EXPR,
2625 TREE_TYPE (base), base,
2626 fold_convert (sizetype, init)));
2628 if (vect_print_dump_info (REPORT_DETAILS))
2630 fprintf (vect_dump, "analyze in outer-loop: ");
2631 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2634 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2635 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2636 gcc_assert (outer_base != NULL_TREE);
2638 if (pbitpos % BITS_PER_UNIT != 0)
2640 if (vect_print_dump_info (REPORT_DETAILS))
2641 fprintf (vect_dump, "failed: bit offset alignment.\n");
2642 return false;
2645 outer_base = build_fold_addr_expr (outer_base);
2646 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2647 &base_iv, false))
2649 if (vect_print_dump_info (REPORT_DETAILS))
2650 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2651 return false;
2654 if (offset)
2656 if (poffset)
2657 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2658 poffset);
2659 else
2660 poffset = offset;
2663 if (!poffset)
2665 offset_iv.base = ssize_int (0);
2666 offset_iv.step = ssize_int (0);
2668 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2669 &offset_iv, false))
2671 if (vect_print_dump_info (REPORT_DETAILS))
2672 fprintf (vect_dump, "evolution of offset is not affine.\n");
2673 return false;
2676 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2677 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2678 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2679 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2680 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2682 outer_step = size_binop (PLUS_EXPR,
2683 fold_convert (ssizetype, base_iv.step),
2684 fold_convert (ssizetype, offset_iv.step));
2686 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2687 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2688 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2689 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2690 STMT_VINFO_DR_OFFSET (stmt_info) =
2691 fold_convert (ssizetype, offset_iv.base);
2692 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2693 size_int (highest_pow2_factor (offset_iv.base));
2695 if (vect_print_dump_info (REPORT_DETAILS))
2697 fprintf (vect_dump, "\touter base_address: ");
2698 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2699 fprintf (vect_dump, "\n\touter offset from base address: ");
2700 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2701 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2702 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2703 fprintf (vect_dump, "\n\touter step: ");
2704 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2705 fprintf (vect_dump, "\n\touter aligned to: ");
2706 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2710 if (STMT_VINFO_DATA_REF (stmt_info))
2712 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2714 fprintf (vect_dump,
2715 "not vectorized: more than one data ref in stmt: ");
2716 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2718 return false;
2721 STMT_VINFO_DATA_REF (stmt_info) = dr;
2723 /* Set vectype for STMT. */
2724 scalar_type = TREE_TYPE (DR_REF (dr));
2725 STMT_VINFO_VECTYPE (stmt_info) =
2726 get_vectype_for_scalar_type (scalar_type);
2727 if (!STMT_VINFO_VECTYPE (stmt_info))
2729 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2731 fprintf (vect_dump,
2732 "not vectorized: no vectype for stmt: ");
2733 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2734 fprintf (vect_dump, " scalar_type: ");
2735 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2738 if (bb_vinfo)
2740 /* Mark the statement as not vectorizable. */
2741 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2742 continue;
2744 else
2745 return false;
2748 /* Adjust the minimal vectorization factor according to the
2749 vector type. */
2750 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2751 if (vf > *min_vf)
2752 *min_vf = vf;
2755 return true;
2759 /* Function vect_get_new_vect_var.
2761 Returns a name for a new variable. The current naming scheme appends the
2762 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2763 the name of vectorizer generated variables, and appends that to NAME if
2764 provided. */
2766 tree
2767 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2769 const char *prefix;
2770 tree new_vect_var;
2772 switch (var_kind)
2774 case vect_simple_var:
2775 prefix = "vect_";
2776 break;
2777 case vect_scalar_var:
2778 prefix = "stmp_";
2779 break;
2780 case vect_pointer_var:
2781 prefix = "vect_p";
2782 break;
2783 default:
2784 gcc_unreachable ();
2787 if (name)
2789 char* tmp = concat (prefix, name, NULL);
2790 new_vect_var = create_tmp_var (type, tmp);
2791 free (tmp);
2793 else
2794 new_vect_var = create_tmp_var (type, prefix);
2796 /* Mark vector typed variable as a gimple register variable. */
2797 if (TREE_CODE (type) == VECTOR_TYPE)
2798 DECL_GIMPLE_REG_P (new_vect_var) = true;
2800 return new_vect_var;
2804 /* Function vect_create_addr_base_for_vector_ref.
2806 Create an expression that computes the address of the first memory location
2807 that will be accessed for a data reference.
2809 Input:
2810 STMT: The statement containing the data reference.
2811 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2812 OFFSET: Optional. If supplied, it is be added to the initial address.
2813 LOOP: Specify relative to which loop-nest should the address be computed.
2814 For example, when the dataref is in an inner-loop nested in an
2815 outer-loop that is now being vectorized, LOOP can be either the
2816 outer-loop, or the inner-loop. The first memory location accessed
2817 by the following dataref ('in' points to short):
2819 for (i=0; i<N; i++)
2820 for (j=0; j<M; j++)
2821 s += in[i+j]
2823 is as follows:
2824 if LOOP=i_loop: &in (relative to i_loop)
2825 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2827 Output:
2828 1. Return an SSA_NAME whose value is the address of the memory location of
2829 the first vector of the data reference.
2830 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2831 these statement(s) which define the returned SSA_NAME.
2833 FORNOW: We are only handling array accesses with step 1. */
2835 tree
2836 vect_create_addr_base_for_vector_ref (gimple stmt,
2837 gimple_seq *new_stmt_list,
2838 tree offset,
2839 struct loop *loop)
2841 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2842 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2843 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2844 tree base_name;
2845 tree data_ref_base_var;
2846 tree vec_stmt;
2847 tree addr_base, addr_expr;
2848 tree dest;
2849 gimple_seq seq = NULL;
2850 tree base_offset = unshare_expr (DR_OFFSET (dr));
2851 tree init = unshare_expr (DR_INIT (dr));
2852 tree vect_ptr_type;
2853 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2854 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2855 tree base;
2857 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2859 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2861 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2863 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2864 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2865 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2868 if (loop_vinfo)
2869 base_name = build_fold_indirect_ref (data_ref_base);
2870 else
2872 base_offset = ssize_int (0);
2873 init = ssize_int (0);
2874 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2877 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2878 add_referenced_var (data_ref_base_var);
2879 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2880 data_ref_base_var);
2881 gimple_seq_add_seq (new_stmt_list, seq);
2883 /* Create base_offset */
2884 base_offset = size_binop (PLUS_EXPR,
2885 fold_convert (sizetype, base_offset),
2886 fold_convert (sizetype, init));
2887 dest = create_tmp_var (sizetype, "base_off");
2888 add_referenced_var (dest);
2889 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2890 gimple_seq_add_seq (new_stmt_list, seq);
2892 if (offset)
2894 tree tmp = create_tmp_var (sizetype, "offset");
2896 add_referenced_var (tmp);
2897 offset = fold_build2 (MULT_EXPR, sizetype,
2898 fold_convert (sizetype, offset), step);
2899 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2900 base_offset, offset);
2901 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2902 gimple_seq_add_seq (new_stmt_list, seq);
2905 /* base + base_offset */
2906 if (loop_vinfo)
2907 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2908 data_ref_base, base_offset);
2909 else
2911 addr_base = build1 (ADDR_EXPR,
2912 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2913 unshare_expr (DR_REF (dr)));
2916 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2917 base = get_base_address (DR_REF (dr));
2918 if (base
2919 && TREE_CODE (base) == MEM_REF)
2920 vect_ptr_type
2921 = build_qualified_type (vect_ptr_type,
2922 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2924 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2925 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2926 get_name (base_name));
2927 add_referenced_var (addr_expr);
2928 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2929 gimple_seq_add_seq (new_stmt_list, seq);
2931 if (DR_PTR_INFO (dr)
2932 && TREE_CODE (vec_stmt) == SSA_NAME)
2934 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2935 if (offset)
2937 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2938 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2942 if (vect_print_dump_info (REPORT_DETAILS))
2944 fprintf (vect_dump, "created ");
2945 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2948 return vec_stmt;
2952 /* Function vect_create_data_ref_ptr.
2954 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
2955 location accessed in the loop by STMT, along with the def-use update
2956 chain to appropriately advance the pointer through the loop iterations.
2957 Also set aliasing information for the pointer. This pointer is used by
2958 the callers to this function to create a memory reference expression for
2959 vector load/store access.
2961 Input:
2962 1. STMT: a stmt that references memory. Expected to be of the form
2963 GIMPLE_ASSIGN <name, data-ref> or
2964 GIMPLE_ASSIGN <data-ref, name>.
2965 2. AGGR_TYPE: the type of the reference, which should be either a vector
2966 or an array.
2967 3. AT_LOOP: the loop where the vector memref is to be created.
2968 4. OFFSET (optional): an offset to be added to the initial address accessed
2969 by the data-ref in STMT.
2970 5. BSI: location where the new stmts are to be placed if there is no loop
2971 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
2972 pointing to the initial address.
2974 Output:
2975 1. Declare a new ptr to vector_type, and have it point to the base of the
2976 data reference (initial addressed accessed by the data reference).
2977 For example, for vector of type V8HI, the following code is generated:
2979 v8hi *ap;
2980 ap = (v8hi *)initial_address;
2982 if OFFSET is not supplied:
2983 initial_address = &a[init];
2984 if OFFSET is supplied:
2985 initial_address = &a[init + OFFSET];
2987 Return the initial_address in INITIAL_ADDRESS.
2989 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2990 update the pointer in each iteration of the loop.
2992 Return the increment stmt that updates the pointer in PTR_INCR.
2994 3. Set INV_P to true if the access pattern of the data reference in the
2995 vectorized loop is invariant. Set it to false otherwise.
2997 4. Return the pointer. */
2999 tree
3000 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3001 tree offset, tree *initial_address,
3002 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3003 bool only_init, bool *inv_p)
3005 tree base_name;
3006 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3007 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3008 struct loop *loop = NULL;
3009 bool nested_in_vect_loop = false;
3010 struct loop *containing_loop = NULL;
3011 tree aggr_ptr_type;
3012 tree aggr_ptr;
3013 tree new_temp;
3014 gimple vec_stmt;
3015 gimple_seq new_stmt_list = NULL;
3016 edge pe = NULL;
3017 basic_block new_bb;
3018 tree aggr_ptr_init;
3019 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3020 tree aptr;
3021 gimple_stmt_iterator incr_gsi;
3022 bool insert_after;
3023 bool negative;
3024 tree indx_before_incr, indx_after_incr;
3025 gimple incr;
3026 tree step;
3027 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3028 tree base;
3030 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3031 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3033 if (loop_vinfo)
3035 loop = LOOP_VINFO_LOOP (loop_vinfo);
3036 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3037 containing_loop = (gimple_bb (stmt))->loop_father;
3038 pe = loop_preheader_edge (loop);
3040 else
3042 gcc_assert (bb_vinfo);
3043 only_init = true;
3044 *ptr_incr = NULL;
3047 /* Check the step (evolution) of the load in LOOP, and record
3048 whether it's invariant. */
3049 if (nested_in_vect_loop)
3050 step = STMT_VINFO_DR_STEP (stmt_info);
3051 else
3052 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3054 if (tree_int_cst_compare (step, size_zero_node) == 0)
3055 *inv_p = true;
3056 else
3057 *inv_p = false;
3058 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3060 /* Create an expression for the first address accessed by this load
3061 in LOOP. */
3062 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3064 if (vect_print_dump_info (REPORT_DETAILS))
3066 tree data_ref_base = base_name;
3067 fprintf (vect_dump, "create %s-pointer variable to type: ",
3068 tree_code_name[(int) TREE_CODE (aggr_type)]);
3069 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3070 if (TREE_CODE (data_ref_base) == VAR_DECL
3071 || TREE_CODE (data_ref_base) == ARRAY_REF)
3072 fprintf (vect_dump, " vectorizing an array ref: ");
3073 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3074 fprintf (vect_dump, " vectorizing a record based array ref: ");
3075 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3076 fprintf (vect_dump, " vectorizing a pointer ref: ");
3077 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3080 /* (1) Create the new aggregate-pointer variable. */
3081 aggr_ptr_type = build_pointer_type (aggr_type);
3082 base = get_base_address (DR_REF (dr));
3083 if (base
3084 && TREE_CODE (base) == MEM_REF)
3085 aggr_ptr_type
3086 = build_qualified_type (aggr_ptr_type,
3087 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3088 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3089 get_name (base_name));
3091 /* Vector and array types inherit the alias set of their component
3092 type by default so we need to use a ref-all pointer if the data
3093 reference does not conflict with the created aggregated data
3094 reference because it is not addressable. */
3095 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3096 get_alias_set (DR_REF (dr))))
3098 aggr_ptr_type
3099 = build_pointer_type_for_mode (aggr_type,
3100 TYPE_MODE (aggr_ptr_type), true);
3101 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3102 get_name (base_name));
3105 /* Likewise for any of the data references in the stmt group. */
3106 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3108 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3111 tree lhs = gimple_assign_lhs (orig_stmt);
3112 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3113 get_alias_set (lhs)))
3115 aggr_ptr_type
3116 = build_pointer_type_for_mode (aggr_type,
3117 TYPE_MODE (aggr_ptr_type), true);
3118 aggr_ptr
3119 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3120 get_name (base_name));
3121 break;
3124 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3126 while (orig_stmt);
3129 add_referenced_var (aggr_ptr);
3131 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3132 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3133 def-use update cycles for the pointer: one relative to the outer-loop
3134 (LOOP), which is what steps (3) and (4) below do. The other is relative
3135 to the inner-loop (which is the inner-most loop containing the dataref),
3136 and this is done be step (5) below.
3138 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3139 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3140 redundant. Steps (3),(4) create the following:
3142 vp0 = &base_addr;
3143 LOOP: vp1 = phi(vp0,vp2)
3146 vp2 = vp1 + step
3147 goto LOOP
3149 If there is an inner-loop nested in loop, then step (5) will also be
3150 applied, and an additional update in the inner-loop will be created:
3152 vp0 = &base_addr;
3153 LOOP: vp1 = phi(vp0,vp2)
3155 inner: vp3 = phi(vp1,vp4)
3156 vp4 = vp3 + inner_step
3157 if () goto inner
3159 vp2 = vp1 + step
3160 if () goto LOOP */
3162 /* (2) Calculate the initial address of the aggregate-pointer, and set
3163 the aggregate-pointer to point to it before the loop. */
3165 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3167 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3168 offset, loop);
3169 if (new_stmt_list)
3171 if (pe)
3173 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3174 gcc_assert (!new_bb);
3176 else
3177 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3180 *initial_address = new_temp;
3182 /* Create: p = (aggr_type *) initial_base */
3183 if (TREE_CODE (new_temp) != SSA_NAME
3184 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3186 vec_stmt = gimple_build_assign (aggr_ptr,
3187 fold_convert (aggr_ptr_type, new_temp));
3188 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3189 /* Copy the points-to information if it exists. */
3190 if (DR_PTR_INFO (dr))
3191 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3192 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3193 if (pe)
3195 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3196 gcc_assert (!new_bb);
3198 else
3199 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3201 else
3202 aggr_ptr_init = new_temp;
3204 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3205 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3206 inner-loop nested in LOOP (during outer-loop vectorization). */
3208 /* No update in loop is required. */
3209 if (only_init && (!loop_vinfo || at_loop == loop))
3210 aptr = aggr_ptr_init;
3211 else
3213 /* The step of the aggregate pointer is the type size. */
3214 tree step = TYPE_SIZE_UNIT (aggr_type);
3215 /* One exception to the above is when the scalar step of the load in
3216 LOOP is zero. In this case the step here is also zero. */
3217 if (*inv_p)
3218 step = size_zero_node;
3219 else if (negative)
3220 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3222 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3224 create_iv (aggr_ptr_init,
3225 fold_convert (aggr_ptr_type, step),
3226 aggr_ptr, loop, &incr_gsi, insert_after,
3227 &indx_before_incr, &indx_after_incr);
3228 incr = gsi_stmt (incr_gsi);
3229 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3231 /* Copy the points-to information if it exists. */
3232 if (DR_PTR_INFO (dr))
3234 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3235 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3237 if (ptr_incr)
3238 *ptr_incr = incr;
3240 aptr = indx_before_incr;
3243 if (!nested_in_vect_loop || only_init)
3244 return aptr;
3247 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3248 nested in LOOP, if exists. */
3250 gcc_assert (nested_in_vect_loop);
3251 if (!only_init)
3253 standard_iv_increment_position (containing_loop, &incr_gsi,
3254 &insert_after);
3255 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3256 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3257 &indx_after_incr);
3258 incr = gsi_stmt (incr_gsi);
3259 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3261 /* Copy the points-to information if it exists. */
3262 if (DR_PTR_INFO (dr))
3264 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3265 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3267 if (ptr_incr)
3268 *ptr_incr = incr;
3270 return indx_before_incr;
3272 else
3273 gcc_unreachable ();
3277 /* Function bump_vector_ptr
3279 Increment a pointer (to a vector type) by vector-size. If requested,
3280 i.e. if PTR-INCR is given, then also connect the new increment stmt
3281 to the existing def-use update-chain of the pointer, by modifying
3282 the PTR_INCR as illustrated below:
3284 The pointer def-use update-chain before this function:
3285 DATAREF_PTR = phi (p_0, p_2)
3286 ....
3287 PTR_INCR: p_2 = DATAREF_PTR + step
3289 The pointer def-use update-chain after this function:
3290 DATAREF_PTR = phi (p_0, p_2)
3291 ....
3292 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3293 ....
3294 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3296 Input:
3297 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3298 in the loop.
3299 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3300 the loop. The increment amount across iterations is expected
3301 to be vector_size.
3302 BSI - location where the new update stmt is to be placed.
3303 STMT - the original scalar memory-access stmt that is being vectorized.
3304 BUMP - optional. The offset by which to bump the pointer. If not given,
3305 the offset is assumed to be vector_size.
3307 Output: Return NEW_DATAREF_PTR as illustrated above.
3311 tree
3312 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3313 gimple stmt, tree bump)
3315 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3316 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3317 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3318 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3319 tree update = TYPE_SIZE_UNIT (vectype);
3320 gimple incr_stmt;
3321 ssa_op_iter iter;
3322 use_operand_p use_p;
3323 tree new_dataref_ptr;
3325 if (bump)
3326 update = bump;
3328 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3329 dataref_ptr, update);
3330 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3331 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3332 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3334 /* Copy the points-to information if it exists. */
3335 if (DR_PTR_INFO (dr))
3337 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3338 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3339 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3342 if (!ptr_incr)
3343 return new_dataref_ptr;
3345 /* Update the vector-pointer's cross-iteration increment. */
3346 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3348 tree use = USE_FROM_PTR (use_p);
3350 if (use == dataref_ptr)
3351 SET_USE (use_p, new_dataref_ptr);
3352 else
3353 gcc_assert (tree_int_cst_compare (use, update) == 0);
3356 return new_dataref_ptr;
3360 /* Function vect_create_destination_var.
3362 Create a new temporary of type VECTYPE. */
3364 tree
3365 vect_create_destination_var (tree scalar_dest, tree vectype)
3367 tree vec_dest;
3368 const char *new_name;
3369 tree type;
3370 enum vect_var_kind kind;
3372 kind = vectype ? vect_simple_var : vect_scalar_var;
3373 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3375 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3377 new_name = get_name (scalar_dest);
3378 if (!new_name)
3379 new_name = "var_";
3380 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3381 add_referenced_var (vec_dest);
3383 return vec_dest;
3386 /* Function vect_strided_store_supported.
3388 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3389 and FALSE otherwise. */
3391 bool
3392 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3394 optab interleave_high_optab, interleave_low_optab;
3395 enum machine_mode mode;
3397 mode = TYPE_MODE (vectype);
3399 /* vect_permute_store_chain requires the group size to be a power of two. */
3400 if (exact_log2 (count) == -1)
3402 if (vect_print_dump_info (REPORT_DETAILS))
3403 fprintf (vect_dump, "the size of the group of strided accesses"
3404 " is not a power of 2");
3405 return false;
3408 /* Check that the operation is supported. */
3409 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3410 vectype, optab_default);
3411 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3412 vectype, optab_default);
3413 if (!interleave_high_optab || !interleave_low_optab)
3415 if (vect_print_dump_info (REPORT_DETAILS))
3416 fprintf (vect_dump, "no optab for interleave.");
3417 return false;
3420 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3421 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3423 if (vect_print_dump_info (REPORT_DETAILS))
3424 fprintf (vect_dump, "interleave op not supported by target.");
3425 return false;
3428 return true;
3432 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3433 type VECTYPE. */
3435 bool
3436 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3438 return vect_lanes_optab_supported_p ("vec_store_lanes",
3439 vec_store_lanes_optab,
3440 vectype, count);
3444 /* Function vect_permute_store_chain.
3446 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3447 a power of 2, generate interleave_high/low stmts to reorder the data
3448 correctly for the stores. Return the final references for stores in
3449 RESULT_CHAIN.
3451 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3452 The input is 4 vectors each containing 8 elements. We assign a number to
3453 each element, the input sequence is:
3455 1st vec: 0 1 2 3 4 5 6 7
3456 2nd vec: 8 9 10 11 12 13 14 15
3457 3rd vec: 16 17 18 19 20 21 22 23
3458 4th vec: 24 25 26 27 28 29 30 31
3460 The output sequence should be:
3462 1st vec: 0 8 16 24 1 9 17 25
3463 2nd vec: 2 10 18 26 3 11 19 27
3464 3rd vec: 4 12 20 28 5 13 21 30
3465 4th vec: 6 14 22 30 7 15 23 31
3467 i.e., we interleave the contents of the four vectors in their order.
3469 We use interleave_high/low instructions to create such output. The input of
3470 each interleave_high/low operation is two vectors:
3471 1st vec 2nd vec
3472 0 1 2 3 4 5 6 7
3473 the even elements of the result vector are obtained left-to-right from the
3474 high/low elements of the first vector. The odd elements of the result are
3475 obtained left-to-right from the high/low elements of the second vector.
3476 The output of interleave_high will be: 0 4 1 5
3477 and of interleave_low: 2 6 3 7
3480 The permutation is done in log LENGTH stages. In each stage interleave_high
3481 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3482 where the first argument is taken from the first half of DR_CHAIN and the
3483 second argument from it's second half.
3484 In our example,
3486 I1: interleave_high (1st vec, 3rd vec)
3487 I2: interleave_low (1st vec, 3rd vec)
3488 I3: interleave_high (2nd vec, 4th vec)
3489 I4: interleave_low (2nd vec, 4th vec)
3491 The output for the first stage is:
3493 I1: 0 16 1 17 2 18 3 19
3494 I2: 4 20 5 21 6 22 7 23
3495 I3: 8 24 9 25 10 26 11 27
3496 I4: 12 28 13 29 14 30 15 31
3498 The output of the second stage, i.e. the final result is:
3500 I1: 0 8 16 24 1 9 17 25
3501 I2: 2 10 18 26 3 11 19 27
3502 I3: 4 12 20 28 5 13 21 30
3503 I4: 6 14 22 30 7 15 23 31. */
3505 void
3506 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3507 unsigned int length,
3508 gimple stmt,
3509 gimple_stmt_iterator *gsi,
3510 VEC(tree,heap) **result_chain)
3512 tree perm_dest, vect1, vect2, high, low;
3513 gimple perm_stmt;
3514 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3515 int i;
3516 unsigned int j;
3517 enum tree_code high_code, low_code;
3519 gcc_assert (vect_strided_store_supported (vectype, length));
3521 *result_chain = VEC_copy (tree, heap, dr_chain);
3523 for (i = 0; i < exact_log2 (length); i++)
3525 for (j = 0; j < length/2; j++)
3527 vect1 = VEC_index (tree, dr_chain, j);
3528 vect2 = VEC_index (tree, dr_chain, j+length/2);
3530 /* Create interleaving stmt:
3531 in the case of big endian:
3532 high = interleave_high (vect1, vect2)
3533 and in the case of little endian:
3534 high = interleave_low (vect1, vect2). */
3535 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3536 DECL_GIMPLE_REG_P (perm_dest) = 1;
3537 add_referenced_var (perm_dest);
3538 if (BYTES_BIG_ENDIAN)
3540 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3541 low_code = VEC_INTERLEAVE_LOW_EXPR;
3543 else
3545 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3546 high_code = VEC_INTERLEAVE_LOW_EXPR;
3548 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3549 vect1, vect2);
3550 high = make_ssa_name (perm_dest, perm_stmt);
3551 gimple_assign_set_lhs (perm_stmt, high);
3552 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3553 VEC_replace (tree, *result_chain, 2*j, high);
3555 /* Create interleaving stmt:
3556 in the case of big endian:
3557 low = interleave_low (vect1, vect2)
3558 and in the case of little endian:
3559 low = interleave_high (vect1, vect2). */
3560 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3561 DECL_GIMPLE_REG_P (perm_dest) = 1;
3562 add_referenced_var (perm_dest);
3563 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3564 vect1, vect2);
3565 low = make_ssa_name (perm_dest, perm_stmt);
3566 gimple_assign_set_lhs (perm_stmt, low);
3567 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3568 VEC_replace (tree, *result_chain, 2*j+1, low);
3570 dr_chain = VEC_copy (tree, heap, *result_chain);
3574 /* Function vect_setup_realignment
3576 This function is called when vectorizing an unaligned load using
3577 the dr_explicit_realign[_optimized] scheme.
3578 This function generates the following code at the loop prolog:
3580 p = initial_addr;
3581 x msq_init = *(floor(p)); # prolog load
3582 realignment_token = call target_builtin;
3583 loop:
3584 x msq = phi (msq_init, ---)
3586 The stmts marked with x are generated only for the case of
3587 dr_explicit_realign_optimized.
3589 The code above sets up a new (vector) pointer, pointing to the first
3590 location accessed by STMT, and a "floor-aligned" load using that pointer.
3591 It also generates code to compute the "realignment-token" (if the relevant
3592 target hook was defined), and creates a phi-node at the loop-header bb
3593 whose arguments are the result of the prolog-load (created by this
3594 function) and the result of a load that takes place in the loop (to be
3595 created by the caller to this function).
3597 For the case of dr_explicit_realign_optimized:
3598 The caller to this function uses the phi-result (msq) to create the
3599 realignment code inside the loop, and sets up the missing phi argument,
3600 as follows:
3601 loop:
3602 msq = phi (msq_init, lsq)
3603 lsq = *(floor(p')); # load in loop
3604 result = realign_load (msq, lsq, realignment_token);
3606 For the case of dr_explicit_realign:
3607 loop:
3608 msq = *(floor(p)); # load in loop
3609 p' = p + (VS-1);
3610 lsq = *(floor(p')); # load in loop
3611 result = realign_load (msq, lsq, realignment_token);
3613 Input:
3614 STMT - (scalar) load stmt to be vectorized. This load accesses
3615 a memory location that may be unaligned.
3616 BSI - place where new code is to be inserted.
3617 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3618 is used.
3620 Output:
3621 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3622 target hook, if defined.
3623 Return value - the result of the loop-header phi node. */
3625 tree
3626 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3627 tree *realignment_token,
3628 enum dr_alignment_support alignment_support_scheme,
3629 tree init_addr,
3630 struct loop **at_loop)
3632 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3633 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3634 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3635 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3636 struct loop *loop = NULL;
3637 edge pe = NULL;
3638 tree scalar_dest = gimple_assign_lhs (stmt);
3639 tree vec_dest;
3640 gimple inc;
3641 tree ptr;
3642 tree data_ref;
3643 gimple new_stmt;
3644 basic_block new_bb;
3645 tree msq_init = NULL_TREE;
3646 tree new_temp;
3647 gimple phi_stmt;
3648 tree msq = NULL_TREE;
3649 gimple_seq stmts = NULL;
3650 bool inv_p;
3651 bool compute_in_loop = false;
3652 bool nested_in_vect_loop = false;
3653 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3654 struct loop *loop_for_initial_load = NULL;
3656 if (loop_vinfo)
3658 loop = LOOP_VINFO_LOOP (loop_vinfo);
3659 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3662 gcc_assert (alignment_support_scheme == dr_explicit_realign
3663 || alignment_support_scheme == dr_explicit_realign_optimized);
3665 /* We need to generate three things:
3666 1. the misalignment computation
3667 2. the extra vector load (for the optimized realignment scheme).
3668 3. the phi node for the two vectors from which the realignment is
3669 done (for the optimized realignment scheme). */
3671 /* 1. Determine where to generate the misalignment computation.
3673 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3674 calculation will be generated by this function, outside the loop (in the
3675 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3676 caller, inside the loop.
3678 Background: If the misalignment remains fixed throughout the iterations of
3679 the loop, then both realignment schemes are applicable, and also the
3680 misalignment computation can be done outside LOOP. This is because we are
3681 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3682 are a multiple of VS (the Vector Size), and therefore the misalignment in
3683 different vectorized LOOP iterations is always the same.
3684 The problem arises only if the memory access is in an inner-loop nested
3685 inside LOOP, which is now being vectorized using outer-loop vectorization.
3686 This is the only case when the misalignment of the memory access may not
3687 remain fixed throughout the iterations of the inner-loop (as explained in
3688 detail in vect_supportable_dr_alignment). In this case, not only is the
3689 optimized realignment scheme not applicable, but also the misalignment
3690 computation (and generation of the realignment token that is passed to
3691 REALIGN_LOAD) have to be done inside the loop.
3693 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3694 or not, which in turn determines if the misalignment is computed inside
3695 the inner-loop, or outside LOOP. */
3697 if (init_addr != NULL_TREE || !loop_vinfo)
3699 compute_in_loop = true;
3700 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3704 /* 2. Determine where to generate the extra vector load.
3706 For the optimized realignment scheme, instead of generating two vector
3707 loads in each iteration, we generate a single extra vector load in the
3708 preheader of the loop, and in each iteration reuse the result of the
3709 vector load from the previous iteration. In case the memory access is in
3710 an inner-loop nested inside LOOP, which is now being vectorized using
3711 outer-loop vectorization, we need to determine whether this initial vector
3712 load should be generated at the preheader of the inner-loop, or can be
3713 generated at the preheader of LOOP. If the memory access has no evolution
3714 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3715 to be generated inside LOOP (in the preheader of the inner-loop). */
3717 if (nested_in_vect_loop)
3719 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3720 bool invariant_in_outerloop =
3721 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3722 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3724 else
3725 loop_for_initial_load = loop;
3726 if (at_loop)
3727 *at_loop = loop_for_initial_load;
3729 if (loop_for_initial_load)
3730 pe = loop_preheader_edge (loop_for_initial_load);
3732 /* 3. For the case of the optimized realignment, create the first vector
3733 load at the loop preheader. */
3735 if (alignment_support_scheme == dr_explicit_realign_optimized)
3737 /* Create msq_init = *(floor(p1)) in the loop preheader */
3739 gcc_assert (!compute_in_loop);
3740 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3741 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
3742 NULL_TREE, &init_addr, NULL, &inc,
3743 true, &inv_p);
3744 new_stmt = gimple_build_assign_with_ops
3745 (BIT_AND_EXPR, NULL_TREE, ptr,
3746 build_int_cst (TREE_TYPE (ptr),
3747 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3748 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3749 gimple_assign_set_lhs (new_stmt, new_temp);
3750 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3751 gcc_assert (!new_bb);
3752 data_ref
3753 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3754 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3755 new_stmt = gimple_build_assign (vec_dest, data_ref);
3756 new_temp = make_ssa_name (vec_dest, new_stmt);
3757 gimple_assign_set_lhs (new_stmt, new_temp);
3758 mark_symbols_for_renaming (new_stmt);
3759 if (pe)
3761 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3762 gcc_assert (!new_bb);
3764 else
3765 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3767 msq_init = gimple_assign_lhs (new_stmt);
3770 /* 4. Create realignment token using a target builtin, if available.
3771 It is done either inside the containing loop, or before LOOP (as
3772 determined above). */
3774 if (targetm.vectorize.builtin_mask_for_load)
3776 tree builtin_decl;
3778 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3779 if (!init_addr)
3781 /* Generate the INIT_ADDR computation outside LOOP. */
3782 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3783 NULL_TREE, loop);
3784 if (loop)
3786 pe = loop_preheader_edge (loop);
3787 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3788 gcc_assert (!new_bb);
3790 else
3791 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3794 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3795 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3796 vec_dest =
3797 vect_create_destination_var (scalar_dest,
3798 gimple_call_return_type (new_stmt));
3799 new_temp = make_ssa_name (vec_dest, new_stmt);
3800 gimple_call_set_lhs (new_stmt, new_temp);
3802 if (compute_in_loop)
3803 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3804 else
3806 /* Generate the misalignment computation outside LOOP. */
3807 pe = loop_preheader_edge (loop);
3808 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3809 gcc_assert (!new_bb);
3812 *realignment_token = gimple_call_lhs (new_stmt);
3814 /* The result of the CALL_EXPR to this builtin is determined from
3815 the value of the parameter and no global variables are touched
3816 which makes the builtin a "const" function. Requiring the
3817 builtin to have the "const" attribute makes it unnecessary
3818 to call mark_call_clobbered. */
3819 gcc_assert (TREE_READONLY (builtin_decl));
3822 if (alignment_support_scheme == dr_explicit_realign)
3823 return msq;
3825 gcc_assert (!compute_in_loop);
3826 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3829 /* 5. Create msq = phi <msq_init, lsq> in loop */
3831 pe = loop_preheader_edge (containing_loop);
3832 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3833 msq = make_ssa_name (vec_dest, NULL);
3834 phi_stmt = create_phi_node (msq, containing_loop->header);
3835 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3836 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3838 return msq;
3842 /* Function vect_strided_load_supported.
3844 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3845 and FALSE otherwise. */
3847 bool
3848 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
3850 optab perm_even_optab, perm_odd_optab;
3851 enum machine_mode mode;
3853 mode = TYPE_MODE (vectype);
3855 /* vect_permute_load_chain requires the group size to be a power of two. */
3856 if (exact_log2 (count) == -1)
3858 if (vect_print_dump_info (REPORT_DETAILS))
3859 fprintf (vect_dump, "the size of the group of strided accesses"
3860 " is not a power of 2");
3861 return false;
3864 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3865 optab_default);
3866 if (!perm_even_optab)
3868 if (vect_print_dump_info (REPORT_DETAILS))
3869 fprintf (vect_dump, "no optab for perm_even.");
3870 return false;
3873 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3875 if (vect_print_dump_info (REPORT_DETAILS))
3876 fprintf (vect_dump, "perm_even op not supported by target.");
3877 return false;
3880 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3881 optab_default);
3882 if (!perm_odd_optab)
3884 if (vect_print_dump_info (REPORT_DETAILS))
3885 fprintf (vect_dump, "no optab for perm_odd.");
3886 return false;
3889 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3891 if (vect_print_dump_info (REPORT_DETAILS))
3892 fprintf (vect_dump, "perm_odd op not supported by target.");
3893 return false;
3895 return true;
3898 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
3899 type VECTYPE. */
3901 bool
3902 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3904 return vect_lanes_optab_supported_p ("vec_load_lanes",
3905 vec_load_lanes_optab,
3906 vectype, count);
3909 /* Function vect_permute_load_chain.
3911 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3912 a power of 2, generate extract_even/odd stmts to reorder the input data
3913 correctly. Return the final references for loads in RESULT_CHAIN.
3915 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3916 The input is 4 vectors each containing 8 elements. We assign a number to each
3917 element, the input sequence is:
3919 1st vec: 0 1 2 3 4 5 6 7
3920 2nd vec: 8 9 10 11 12 13 14 15
3921 3rd vec: 16 17 18 19 20 21 22 23
3922 4th vec: 24 25 26 27 28 29 30 31
3924 The output sequence should be:
3926 1st vec: 0 4 8 12 16 20 24 28
3927 2nd vec: 1 5 9 13 17 21 25 29
3928 3rd vec: 2 6 10 14 18 22 26 30
3929 4th vec: 3 7 11 15 19 23 27 31
3931 i.e., the first output vector should contain the first elements of each
3932 interleaving group, etc.
3934 We use extract_even/odd instructions to create such output. The input of
3935 each extract_even/odd operation is two vectors
3936 1st vec 2nd vec
3937 0 1 2 3 4 5 6 7
3939 and the output is the vector of extracted even/odd elements. The output of
3940 extract_even will be: 0 2 4 6
3941 and of extract_odd: 1 3 5 7
3944 The permutation is done in log LENGTH stages. In each stage extract_even
3945 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3946 their order. In our example,
3948 E1: extract_even (1st vec, 2nd vec)
3949 E2: extract_odd (1st vec, 2nd vec)
3950 E3: extract_even (3rd vec, 4th vec)
3951 E4: extract_odd (3rd vec, 4th vec)
3953 The output for the first stage will be:
3955 E1: 0 2 4 6 8 10 12 14
3956 E2: 1 3 5 7 9 11 13 15
3957 E3: 16 18 20 22 24 26 28 30
3958 E4: 17 19 21 23 25 27 29 31
3960 In order to proceed and create the correct sequence for the next stage (or
3961 for the correct output, if the second stage is the last one, as in our
3962 example), we first put the output of extract_even operation and then the
3963 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3964 The input for the second stage is:
3966 1st vec (E1): 0 2 4 6 8 10 12 14
3967 2nd vec (E3): 16 18 20 22 24 26 28 30
3968 3rd vec (E2): 1 3 5 7 9 11 13 15
3969 4th vec (E4): 17 19 21 23 25 27 29 31
3971 The output of the second stage:
3973 E1: 0 4 8 12 16 20 24 28
3974 E2: 2 6 10 14 18 22 26 30
3975 E3: 1 5 9 13 17 21 25 29
3976 E4: 3 7 11 15 19 23 27 31
3978 And RESULT_CHAIN after reordering:
3980 1st vec (E1): 0 4 8 12 16 20 24 28
3981 2nd vec (E3): 1 5 9 13 17 21 25 29
3982 3rd vec (E2): 2 6 10 14 18 22 26 30
3983 4th vec (E4): 3 7 11 15 19 23 27 31. */
3985 static void
3986 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3987 unsigned int length,
3988 gimple stmt,
3989 gimple_stmt_iterator *gsi,
3990 VEC(tree,heap) **result_chain)
3992 tree perm_dest, data_ref, first_vect, second_vect;
3993 gimple perm_stmt;
3994 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3995 int i;
3996 unsigned int j;
3998 gcc_assert (vect_strided_load_supported (vectype, length));
4000 *result_chain = VEC_copy (tree, heap, dr_chain);
4001 for (i = 0; i < exact_log2 (length); i++)
4003 for (j = 0; j < length; j +=2)
4005 first_vect = VEC_index (tree, dr_chain, j);
4006 second_vect = VEC_index (tree, dr_chain, j+1);
4008 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4009 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4010 DECL_GIMPLE_REG_P (perm_dest) = 1;
4011 add_referenced_var (perm_dest);
4013 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
4014 perm_dest, first_vect,
4015 second_vect);
4017 data_ref = make_ssa_name (perm_dest, perm_stmt);
4018 gimple_assign_set_lhs (perm_stmt, data_ref);
4019 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4020 mark_symbols_for_renaming (perm_stmt);
4022 VEC_replace (tree, *result_chain, j/2, data_ref);
4024 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4025 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4026 DECL_GIMPLE_REG_P (perm_dest) = 1;
4027 add_referenced_var (perm_dest);
4029 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
4030 perm_dest, first_vect,
4031 second_vect);
4032 data_ref = make_ssa_name (perm_dest, perm_stmt);
4033 gimple_assign_set_lhs (perm_stmt, data_ref);
4034 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4035 mark_symbols_for_renaming (perm_stmt);
4037 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4039 dr_chain = VEC_copy (tree, heap, *result_chain);
4044 /* Function vect_transform_strided_load.
4046 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4047 to perform their permutation and ascribe the result vectorized statements to
4048 the scalar statements.
4051 void
4052 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4053 gimple_stmt_iterator *gsi)
4055 VEC(tree,heap) *result_chain = NULL;
4057 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4058 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4059 vectors, that are ready for vector computation. */
4060 result_chain = VEC_alloc (tree, heap, size);
4061 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4062 vect_record_strided_load_vectors (stmt, result_chain);
4063 VEC_free (tree, heap, result_chain);
4066 /* RESULT_CHAIN contains the output of a group of strided loads that were
4067 generated as part of the vectorization of STMT. Assign the statement
4068 for each vector to the associated scalar statement. */
4070 void
4071 vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4073 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4074 gimple next_stmt, new_stmt;
4075 unsigned int i, gap_count;
4076 tree tmp_data_ref;
4078 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4079 Since we scan the chain starting from it's first node, their order
4080 corresponds the order of data-refs in RESULT_CHAIN. */
4081 next_stmt = first_stmt;
4082 gap_count = 1;
4083 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4085 if (!next_stmt)
4086 break;
4088 /* Skip the gaps. Loads created for the gaps will be removed by dead
4089 code elimination pass later. No need to check for the first stmt in
4090 the group, since it always exists.
4091 GROUP_GAP is the number of steps in elements from the previous
4092 access (if there is no gap GROUP_GAP is 1). We skip loads that
4093 correspond to the gaps. */
4094 if (next_stmt != first_stmt
4095 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4097 gap_count++;
4098 continue;
4101 while (next_stmt)
4103 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4104 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4105 copies, and we put the new vector statement in the first available
4106 RELATED_STMT. */
4107 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4108 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4109 else
4111 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4113 gimple prev_stmt =
4114 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4115 gimple rel_stmt =
4116 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4117 while (rel_stmt)
4119 prev_stmt = rel_stmt;
4120 rel_stmt =
4121 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4124 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4125 new_stmt;
4129 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4130 gap_count = 1;
4131 /* If NEXT_STMT accesses the same DR as the previous statement,
4132 put the same TMP_DATA_REF as its vectorized statement; otherwise
4133 get the next data-ref from RESULT_CHAIN. */
4134 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4135 break;
4140 /* Function vect_force_dr_alignment_p.
4142 Returns whether the alignment of a DECL can be forced to be aligned
4143 on ALIGNMENT bit boundary. */
4145 bool
4146 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4148 if (TREE_CODE (decl) != VAR_DECL)
4149 return false;
4151 if (DECL_EXTERNAL (decl))
4152 return false;
4154 if (TREE_ASM_WRITTEN (decl))
4155 return false;
4157 if (TREE_STATIC (decl))
4158 return (alignment <= MAX_OFILE_ALIGNMENT);
4159 else
4160 return (alignment <= MAX_STACK_ALIGNMENT);
4164 /* Return whether the data reference DR is supported with respect to its
4165 alignment.
4166 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4167 it is aligned, i.e., check if it is possible to vectorize it with different
4168 alignment. */
4170 enum dr_alignment_support
4171 vect_supportable_dr_alignment (struct data_reference *dr,
4172 bool check_aligned_accesses)
4174 gimple stmt = DR_STMT (dr);
4175 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4176 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4177 enum machine_mode mode = TYPE_MODE (vectype);
4178 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4179 struct loop *vect_loop = NULL;
4180 bool nested_in_vect_loop = false;
4182 if (aligned_access_p (dr) && !check_aligned_accesses)
4183 return dr_aligned;
4185 if (loop_vinfo)
4187 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4188 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4191 /* Possibly unaligned access. */
4193 /* We can choose between using the implicit realignment scheme (generating
4194 a misaligned_move stmt) and the explicit realignment scheme (generating
4195 aligned loads with a REALIGN_LOAD). There are two variants to the
4196 explicit realignment scheme: optimized, and unoptimized.
4197 We can optimize the realignment only if the step between consecutive
4198 vector loads is equal to the vector size. Since the vector memory
4199 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4200 is guaranteed that the misalignment amount remains the same throughout the
4201 execution of the vectorized loop. Therefore, we can create the
4202 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4203 at the loop preheader.
4205 However, in the case of outer-loop vectorization, when vectorizing a
4206 memory access in the inner-loop nested within the LOOP that is now being
4207 vectorized, while it is guaranteed that the misalignment of the
4208 vectorized memory access will remain the same in different outer-loop
4209 iterations, it is *not* guaranteed that is will remain the same throughout
4210 the execution of the inner-loop. This is because the inner-loop advances
4211 with the original scalar step (and not in steps of VS). If the inner-loop
4212 step happens to be a multiple of VS, then the misalignment remains fixed
4213 and we can use the optimized realignment scheme. For example:
4215 for (i=0; i<N; i++)
4216 for (j=0; j<M; j++)
4217 s += a[i+j];
4219 When vectorizing the i-loop in the above example, the step between
4220 consecutive vector loads is 1, and so the misalignment does not remain
4221 fixed across the execution of the inner-loop, and the realignment cannot
4222 be optimized (as illustrated in the following pseudo vectorized loop):
4224 for (i=0; i<N; i+=4)
4225 for (j=0; j<M; j++){
4226 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4227 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4228 // (assuming that we start from an aligned address).
4231 We therefore have to use the unoptimized realignment scheme:
4233 for (i=0; i<N; i+=4)
4234 for (j=k; j<M; j+=4)
4235 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4236 // that the misalignment of the initial address is
4237 // 0).
4239 The loop can then be vectorized as follows:
4241 for (k=0; k<4; k++){
4242 rt = get_realignment_token (&vp[k]);
4243 for (i=0; i<N; i+=4){
4244 v1 = vp[i+k];
4245 for (j=k; j<M; j+=4){
4246 v2 = vp[i+j+VS-1];
4247 va = REALIGN_LOAD <v1,v2,rt>;
4248 vs += va;
4249 v1 = v2;
4252 } */
4254 if (DR_IS_READ (dr))
4256 bool is_packed = false;
4257 tree type = (TREE_TYPE (DR_REF (dr)));
4259 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4260 && (!targetm.vectorize.builtin_mask_for_load
4261 || targetm.vectorize.builtin_mask_for_load ()))
4263 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4264 if ((nested_in_vect_loop
4265 && (TREE_INT_CST_LOW (DR_STEP (dr))
4266 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4267 || !loop_vinfo)
4268 return dr_explicit_realign;
4269 else
4270 return dr_explicit_realign_optimized;
4272 if (!known_alignment_for_access_p (dr))
4274 tree ba = DR_BASE_OBJECT (dr);
4276 if (ba)
4277 is_packed = contains_packed_reference (ba);
4280 if (targetm.vectorize.
4281 support_vector_misalignment (mode, type,
4282 DR_MISALIGNMENT (dr), is_packed))
4283 /* Can't software pipeline the loads, but can at least do them. */
4284 return dr_unaligned_supported;
4286 else
4288 bool is_packed = false;
4289 tree type = (TREE_TYPE (DR_REF (dr)));
4291 if (!known_alignment_for_access_p (dr))
4293 tree ba = DR_BASE_OBJECT (dr);
4295 if (ba)
4296 is_packed = contains_packed_reference (ba);
4299 if (targetm.vectorize.
4300 support_vector_misalignment (mode, type,
4301 DR_MISALIGNMENT (dr), is_packed))
4302 return dr_unaligned_supported;
4305 /* Unsupported. */
4306 return dr_unaligned_unsupported;