20090811-1.c: Skip for incompatible options, do not override other options.
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob5023710f1e583bde3c151d7feb5ef18a704e9218
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
49 static bool
50 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 tree vectype, unsigned HOST_WIDE_INT count)
53 enum machine_mode mode, array_mode;
54 bool limit_p;
56 mode = TYPE_MODE (vectype);
57 limit_p = !targetm.array_mode_supported_p (mode, count);
58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
59 MODE_INT, limit_p);
61 if (array_mode == BLKmode)
63 if (vect_print_dump_info (REPORT_DETAILS))
64 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65 GET_MODE_NAME (mode), count);
66 return false;
69 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
71 if (vect_print_dump_info (REPORT_DETAILS))
72 fprintf (vect_dump, "cannot use %s<%s><%s>",
73 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
74 return false;
77 if (vect_print_dump_info (REPORT_DETAILS))
78 fprintf (vect_dump, "can use %s<%s><%s>",
79 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
81 return true;
85 /* Return the smallest scalar part of STMT.
86 This is used to determine the vectype of the stmt. We generally set the
87 vectype according to the type of the result (lhs). For stmts whose
88 result-type is different than the type of the arguments (e.g., demotion,
89 promotion), vectype will be reset appropriately (later). Note that we have
90 to visit the smallest datatype in this function, because that determines the
91 VF. If the smallest datatype in the loop is present only as the rhs of a
92 promotion operation - we'd miss it.
93 Such a case, where a variable of this datatype does not appear in the lhs
94 anywhere in the loop, can only occur if it's an invariant: e.g.:
95 'int_x = (int) short_inv', which we'd expect to have been optimized away by
96 invariant motion. However, we cannot rely on invariant motion to always
97 take invariants out of the loop, and so in the case of promotion we also
98 have to check the rhs.
99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
100 types. */
102 tree
103 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104 HOST_WIDE_INT *rhs_size_unit)
106 tree scalar_type = gimple_expr_type (stmt);
107 HOST_WIDE_INT lhs, rhs;
109 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
111 if (is_gimple_assign (stmt)
112 && (gimple_assign_cast_p (stmt)
113 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
119 if (rhs < lhs)
120 scalar_type = rhs_type;
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
125 return scalar_type;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135 gimple next_stmt = first_stmt;
136 int result = 0;
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
139 return -1;
141 while (next_stmt && next_stmt != stmt)
143 result++;
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
147 if (next_stmt)
148 return result;
149 else
150 return -1;
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
158 static void
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
162 gimple prev, next;
163 tree next_init;
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
169 while (next)
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
174 /* Insert here. */
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
177 return;
179 prev = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
196 FIRST_DR = DRB
197 NEXT_DR (DRB) = DRA
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
204 FIRST_DR = DRB
205 NEXT(FIRST_DR) = previous FIRST_DR
206 else
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
212 static void
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
220 tree node_init;
221 gimple node, prev, next, first_stmt;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
229 return;
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
238 return;
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
246 old_first_stmt)));
247 gimple tmp;
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
259 while (tmp)
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
265 else
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
271 return;
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
278 return;
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
291 else
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
301 while (node)
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
305 while (next)
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
310 /* Insert here. */
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
313 prev = node;
314 break;
316 prev = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
319 if (!next)
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
324 prev = node;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
337 static bool
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
342 gimple earlier_stmt;
344 /* We only call this function for pairs of loads and stores, but we verify
345 it here. */
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 if (DR_IS_READ (dra))
349 return false;
350 else
351 return true;
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
358 return true;
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
367 return true;
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
374 return false;
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
383 return false;
385 return true;
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
394 static bool
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
406 return false;
408 /* Check:
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
412 data-refs' inits. */
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
420 return false;
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
426 if (init_a > init_b)
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
432 if (step && (init_a - init_b) > step)
433 return false;
435 if (diff_mod_size == 0)
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
445 return true;
448 else
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
454 if (step && (init_b - init_a) > step)
455 return false;
457 if (diff_mod_size == 0)
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
467 return true;
471 return false;
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
478 static bool
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
489 return true;
490 else
491 return false;
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
497 static bool
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
504 return true;
505 else
506 return false;
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
513 static bool
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
519 return false;
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
529 if (optimize_loop_nest_for_size_p (loop))
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
533 return false;
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
537 if (loop->inner)
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
541 return false;
544 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
545 return true;
549 /* Function vect_analyze_data_ref_dependence.
551 Return TRUE if there (might) exist a dependence between a memory-reference
552 DRA and a memory-reference DRB. When versioning for alias may check a
553 dependence at run-time, return FALSE. Adjust *MAX_VF according to
554 the data dependence. */
556 static bool
557 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
558 loop_vec_info loop_vinfo, int *max_vf,
559 bool *data_dependence_in_bb)
561 unsigned int i;
562 struct loop *loop = NULL;
563 struct data_reference *dra = DDR_A (ddr);
564 struct data_reference *drb = DDR_B (ddr);
565 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
566 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
567 lambda_vector dist_v;
568 unsigned int loop_depth;
570 /* Don't bother to analyze statements marked as unvectorizable. */
571 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
572 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
573 return false;
575 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
577 /* Independent data accesses. */
578 vect_check_interleaving (dra, drb);
579 return false;
582 if (loop_vinfo)
583 loop = LOOP_VINFO_LOOP (loop_vinfo);
585 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
586 return false;
588 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
590 if (loop_vinfo)
592 if (vect_print_dump_info (REPORT_DR_DETAILS))
594 fprintf (vect_dump, "versioning for alias required: "
595 "can't determine dependence between ");
596 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
597 fprintf (vect_dump, " and ");
598 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
601 /* Add to list of ddrs that need to be tested at run-time. */
602 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
605 /* When vectorizing a basic block unknown depnedence can still mean
606 strided access. */
607 if (vect_check_interleaving (dra, drb))
608 return false;
610 if (vect_print_dump_info (REPORT_DR_DETAILS))
612 fprintf (vect_dump, "can't determine dependence between ");
613 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
614 fprintf (vect_dump, " and ");
615 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
618 /* We do not vectorize basic blocks with write-write dependencies. */
619 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
620 return true;
622 /* We deal with read-write dependencies in basic blocks later (by
623 verifying that all the loads in the basic block are before all the
624 stores). */
625 *data_dependence_in_bb = true;
626 return false;
629 /* Versioning for alias is not yet supported for basic block SLP, and
630 dependence distance is unapplicable, hence, in case of known data
631 dependence, basic block vectorization is impossible for now. */
632 if (!loop_vinfo)
634 if (dra != drb && vect_check_interleaving (dra, drb))
635 return false;
637 if (vect_print_dump_info (REPORT_DR_DETAILS))
639 fprintf (vect_dump, "determined dependence between ");
640 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
641 fprintf (vect_dump, " and ");
642 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
645 /* Do not vectorize basic blcoks with write-write dependences. */
646 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
647 return true;
649 /* Check if this dependence is allowed in basic block vectorization. */
650 return vect_drs_dependent_in_basic_block (dra, drb);
653 /* Loop-based vectorization and known data dependence. */
654 if (DDR_NUM_DIST_VECTS (ddr) == 0)
656 if (vect_print_dump_info (REPORT_DR_DETAILS))
658 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
659 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
660 fprintf (vect_dump, " and ");
661 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
663 /* Add to list of ddrs that need to be tested at run-time. */
664 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
667 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
668 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
670 int dist = dist_v[loop_depth];
672 if (vect_print_dump_info (REPORT_DR_DETAILS))
673 fprintf (vect_dump, "dependence distance = %d.", dist);
675 if (dist == 0)
677 if (vect_print_dump_info (REPORT_DR_DETAILS))
679 fprintf (vect_dump, "dependence distance == 0 between ");
680 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
681 fprintf (vect_dump, " and ");
682 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
685 /* For interleaving, mark that there is a read-write dependency if
686 necessary. We check before that one of the data-refs is store. */
687 if (DR_IS_READ (dra))
688 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
689 else
691 if (DR_IS_READ (drb))
692 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
695 continue;
698 if (dist > 0 && DDR_REVERSED_P (ddr))
700 /* If DDR_REVERSED_P the order of the data-refs in DDR was
701 reversed (to make distance vector positive), and the actual
702 distance is negative. */
703 if (vect_print_dump_info (REPORT_DR_DETAILS))
704 fprintf (vect_dump, "dependence distance negative.");
705 continue;
708 if (abs (dist) >= 2
709 && abs (dist) < *max_vf)
711 /* The dependence distance requires reduction of the maximal
712 vectorization factor. */
713 *max_vf = abs (dist);
714 if (vect_print_dump_info (REPORT_DR_DETAILS))
715 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
716 *max_vf);
719 if (abs (dist) >= *max_vf)
721 /* Dependence distance does not create dependence, as far as
722 vectorization is concerned, in this case. */
723 if (vect_print_dump_info (REPORT_DR_DETAILS))
724 fprintf (vect_dump, "dependence distance >= VF.");
725 continue;
728 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
730 fprintf (vect_dump, "not vectorized, possible dependence "
731 "between data-refs ");
732 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
733 fprintf (vect_dump, " and ");
734 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
737 return true;
740 return false;
743 /* Function vect_analyze_data_ref_dependences.
745 Examine all the data references in the loop, and make sure there do not
746 exist any data dependences between them. Set *MAX_VF according to
747 the maximum vectorization factor the data dependences allow. */
749 bool
750 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
751 bb_vec_info bb_vinfo, int *max_vf,
752 bool *data_dependence_in_bb)
754 unsigned int i;
755 VEC (ddr_p, heap) *ddrs = NULL;
756 struct data_dependence_relation *ddr;
758 if (vect_print_dump_info (REPORT_DETAILS))
759 fprintf (vect_dump, "=== vect_analyze_dependences ===");
761 if (loop_vinfo)
762 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
763 else
764 ddrs = BB_VINFO_DDRS (bb_vinfo);
766 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
767 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
768 data_dependence_in_bb))
769 return false;
771 return true;
775 /* Function vect_compute_data_ref_alignment
777 Compute the misalignment of the data reference DR.
779 Output:
780 1. If during the misalignment computation it is found that the data reference
781 cannot be vectorized then false is returned.
782 2. DR_MISALIGNMENT (DR) is defined.
784 FOR NOW: No analysis is actually performed. Misalignment is calculated
785 only for trivial cases. TODO. */
787 static bool
788 vect_compute_data_ref_alignment (struct data_reference *dr)
790 gimple stmt = DR_STMT (dr);
791 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
792 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
793 struct loop *loop = NULL;
794 tree ref = DR_REF (dr);
795 tree vectype;
796 tree base, base_addr;
797 bool base_aligned;
798 tree misalign;
799 tree aligned_to, alignment;
801 if (vect_print_dump_info (REPORT_DETAILS))
802 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
804 if (loop_vinfo)
805 loop = LOOP_VINFO_LOOP (loop_vinfo);
807 /* Initialize misalignment to unknown. */
808 SET_DR_MISALIGNMENT (dr, -1);
810 misalign = DR_INIT (dr);
811 aligned_to = DR_ALIGNED_TO (dr);
812 base_addr = DR_BASE_ADDRESS (dr);
813 vectype = STMT_VINFO_VECTYPE (stmt_info);
815 /* In case the dataref is in an inner-loop of the loop that is being
816 vectorized (LOOP), we use the base and misalignment information
817 relative to the outer-loop (LOOP). This is ok only if the misalignment
818 stays the same throughout the execution of the inner-loop, which is why
819 we have to check that the stride of the dataref in the inner-loop evenly
820 divides by the vector size. */
821 if (loop && nested_in_vect_loop_p (loop, stmt))
823 tree step = DR_STEP (dr);
824 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
826 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
828 if (vect_print_dump_info (REPORT_ALIGNMENT))
829 fprintf (vect_dump, "inner step divides the vector-size.");
830 misalign = STMT_VINFO_DR_INIT (stmt_info);
831 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
832 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
834 else
836 if (vect_print_dump_info (REPORT_ALIGNMENT))
837 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
838 misalign = NULL_TREE;
842 base = build_fold_indirect_ref (base_addr);
843 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
845 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
846 || !misalign)
848 if (vect_print_dump_info (REPORT_ALIGNMENT))
850 fprintf (vect_dump, "Unknown alignment for access: ");
851 print_generic_expr (vect_dump, base, TDF_SLIM);
853 return true;
856 if ((DECL_P (base)
857 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
858 alignment) >= 0)
859 || (TREE_CODE (base_addr) == SSA_NAME
860 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
861 TREE_TYPE (base_addr)))),
862 alignment) >= 0))
863 base_aligned = true;
864 else
865 base_aligned = false;
867 if (!base_aligned)
869 /* Do not change the alignment of global variables if
870 flag_section_anchors is enabled. */
871 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
872 || (TREE_STATIC (base) && flag_section_anchors))
874 if (vect_print_dump_info (REPORT_DETAILS))
876 fprintf (vect_dump, "can't force alignment of ref: ");
877 print_generic_expr (vect_dump, ref, TDF_SLIM);
879 return true;
882 /* Force the alignment of the decl.
883 NOTE: This is the only change to the code we make during
884 the analysis phase, before deciding to vectorize the loop. */
885 if (vect_print_dump_info (REPORT_DETAILS))
887 fprintf (vect_dump, "force alignment of ");
888 print_generic_expr (vect_dump, ref, TDF_SLIM);
891 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
892 DECL_USER_ALIGN (base) = 1;
895 /* At this point we assume that the base is aligned. */
896 gcc_assert (base_aligned
897 || (TREE_CODE (base) == VAR_DECL
898 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
900 /* If this is a backward running DR then first access in the larger
901 vectype actually is N-1 elements before the address in the DR.
902 Adjust misalign accordingly. */
903 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
905 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
906 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
907 otherwise we wouldn't be here. */
908 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
909 /* PLUS because DR_STEP was negative. */
910 misalign = size_binop (PLUS_EXPR, misalign, offset);
913 /* Modulo alignment. */
914 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
916 if (!host_integerp (misalign, 1))
918 /* Negative or overflowed misalignment value. */
919 if (vect_print_dump_info (REPORT_DETAILS))
920 fprintf (vect_dump, "unexpected misalign value");
921 return false;
924 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
926 if (vect_print_dump_info (REPORT_DETAILS))
928 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
929 print_generic_expr (vect_dump, ref, TDF_SLIM);
932 return true;
936 /* Function vect_compute_data_refs_alignment
938 Compute the misalignment of data references in the loop.
939 Return FALSE if a data reference is found that cannot be vectorized. */
941 static bool
942 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
943 bb_vec_info bb_vinfo)
945 VEC (data_reference_p, heap) *datarefs;
946 struct data_reference *dr;
947 unsigned int i;
949 if (loop_vinfo)
950 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
951 else
952 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
954 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
955 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
956 && !vect_compute_data_ref_alignment (dr))
958 if (bb_vinfo)
960 /* Mark unsupported statement as unvectorizable. */
961 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
962 continue;
964 else
965 return false;
968 return true;
972 /* Function vect_update_misalignment_for_peel
974 DR - the data reference whose misalignment is to be adjusted.
975 DR_PEEL - the data reference whose misalignment is being made
976 zero in the vector loop by the peel.
977 NPEEL - the number of iterations in the peel loop if the misalignment
978 of DR_PEEL is known at compile time. */
980 static void
981 vect_update_misalignment_for_peel (struct data_reference *dr,
982 struct data_reference *dr_peel, int npeel)
984 unsigned int i;
985 VEC(dr_p,heap) *same_align_drs;
986 struct data_reference *current_dr;
987 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
988 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
989 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
990 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
992 /* For interleaved data accesses the step in the loop must be multiplied by
993 the size of the interleaving group. */
994 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
995 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
996 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
997 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
999 /* It can be assumed that the data refs with the same alignment as dr_peel
1000 are aligned in the vector loop. */
1001 same_align_drs
1002 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1003 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1005 if (current_dr != dr)
1006 continue;
1007 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1008 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1009 SET_DR_MISALIGNMENT (dr, 0);
1010 return;
1013 if (known_alignment_for_access_p (dr)
1014 && known_alignment_for_access_p (dr_peel))
1016 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1017 int misal = DR_MISALIGNMENT (dr);
1018 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1019 misal += negative ? -npeel * dr_size : npeel * dr_size;
1020 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1021 SET_DR_MISALIGNMENT (dr, misal);
1022 return;
1025 if (vect_print_dump_info (REPORT_DETAILS))
1026 fprintf (vect_dump, "Setting misalignment to -1.");
1027 SET_DR_MISALIGNMENT (dr, -1);
1031 /* Function vect_verify_datarefs_alignment
1033 Return TRUE if all data references in the loop can be
1034 handled with respect to alignment. */
1036 bool
1037 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1039 VEC (data_reference_p, heap) *datarefs;
1040 struct data_reference *dr;
1041 enum dr_alignment_support supportable_dr_alignment;
1042 unsigned int i;
1044 if (loop_vinfo)
1045 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1046 else
1047 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1049 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1051 gimple stmt = DR_STMT (dr);
1052 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1054 /* For interleaving, only the alignment of the first access matters.
1055 Skip statements marked as not vectorizable. */
1056 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1057 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1058 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1059 continue;
1061 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1062 if (!supportable_dr_alignment)
1064 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1066 if (DR_IS_READ (dr))
1067 fprintf (vect_dump,
1068 "not vectorized: unsupported unaligned load.");
1069 else
1070 fprintf (vect_dump,
1071 "not vectorized: unsupported unaligned store.");
1073 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1075 return false;
1077 if (supportable_dr_alignment != dr_aligned
1078 && vect_print_dump_info (REPORT_ALIGNMENT))
1079 fprintf (vect_dump, "Vectorizing an unaligned access.");
1081 return true;
1085 /* Function vector_alignment_reachable_p
1087 Return true if vector alignment for DR is reachable by peeling
1088 a few loop iterations. Return false otherwise. */
1090 static bool
1091 vector_alignment_reachable_p (struct data_reference *dr)
1093 gimple stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1095 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1097 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1099 /* For interleaved access we peel only if number of iterations in
1100 the prolog loop ({VF - misalignment}), is a multiple of the
1101 number of the interleaved accesses. */
1102 int elem_size, mis_in_elements;
1103 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1105 /* FORNOW: handle only known alignment. */
1106 if (!known_alignment_for_access_p (dr))
1107 return false;
1109 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1110 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1112 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1113 return false;
1116 /* If misalignment is known at the compile time then allow peeling
1117 only if natural alignment is reachable through peeling. */
1118 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1120 HOST_WIDE_INT elmsize =
1121 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1122 if (vect_print_dump_info (REPORT_DETAILS))
1124 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1125 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1127 if (DR_MISALIGNMENT (dr) % elmsize)
1129 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1131 return false;
1135 if (!known_alignment_for_access_p (dr))
1137 tree type = (TREE_TYPE (DR_REF (dr)));
1138 tree ba = DR_BASE_OBJECT (dr);
1139 bool is_packed = false;
1141 if (ba)
1142 is_packed = contains_packed_reference (ba);
1144 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1145 is_packed = true;
1147 if (vect_print_dump_info (REPORT_DETAILS))
1148 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1149 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1150 return true;
1151 else
1152 return false;
1155 return true;
1159 /* Calculate the cost of the memory access represented by DR. */
1161 static void
1162 vect_get_data_access_cost (struct data_reference *dr,
1163 unsigned int *inside_cost,
1164 unsigned int *outside_cost)
1166 gimple stmt = DR_STMT (dr);
1167 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1168 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1169 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1170 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1171 int ncopies = vf / nunits;
1172 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1174 if (!supportable_dr_alignment)
1175 *inside_cost = VECT_MAX_COST;
1176 else
1178 if (DR_IS_READ (dr))
1179 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1180 else
1181 vect_get_store_cost (dr, ncopies, inside_cost);
1184 if (vect_print_dump_info (REPORT_COST))
1185 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1186 "outside_cost = %d.", *inside_cost, *outside_cost);
1190 static hashval_t
1191 vect_peeling_hash (const void *elem)
1193 const struct _vect_peel_info *peel_info;
1195 peel_info = (const struct _vect_peel_info *) elem;
1196 return (hashval_t) peel_info->npeel;
1200 static int
1201 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1203 const struct _vect_peel_info *a, *b;
1205 a = (const struct _vect_peel_info *) elem1;
1206 b = (const struct _vect_peel_info *) elem2;
1207 return (a->npeel == b->npeel);
1211 /* Insert DR into peeling hash table with NPEEL as key. */
1213 static void
1214 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1215 int npeel)
1217 struct _vect_peel_info elem, *slot;
1218 void **new_slot;
1219 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1221 elem.npeel = npeel;
1222 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1223 &elem);
1224 if (slot)
1225 slot->count++;
1226 else
1228 slot = XNEW (struct _vect_peel_info);
1229 slot->npeel = npeel;
1230 slot->dr = dr;
1231 slot->count = 1;
1232 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1233 INSERT);
1234 *new_slot = slot;
1237 if (!supportable_dr_alignment && !flag_vect_cost_model)
1238 slot->count += VECT_MAX_COST;
1242 /* Traverse peeling hash table to find peeling option that aligns maximum
1243 number of data accesses. */
1245 static int
1246 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1248 vect_peel_info elem = (vect_peel_info) *slot;
1249 vect_peel_extended_info max = (vect_peel_extended_info) data;
1251 if (elem->count > max->peel_info.count)
1253 max->peel_info.npeel = elem->npeel;
1254 max->peel_info.count = elem->count;
1255 max->peel_info.dr = elem->dr;
1258 return 1;
1262 /* Traverse peeling hash table and calculate cost for each peeling option.
1263 Find the one with the lowest cost. */
1265 static int
1266 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1268 vect_peel_info elem = (vect_peel_info) *slot;
1269 vect_peel_extended_info min = (vect_peel_extended_info) data;
1270 int save_misalignment, dummy;
1271 unsigned int inside_cost = 0, outside_cost = 0, i;
1272 gimple stmt = DR_STMT (elem->dr);
1273 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1274 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1275 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1276 struct data_reference *dr;
1278 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1280 stmt = DR_STMT (dr);
1281 stmt_info = vinfo_for_stmt (stmt);
1282 /* For interleaving, only the alignment of the first access
1283 matters. */
1284 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1285 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1286 continue;
1288 save_misalignment = DR_MISALIGNMENT (dr);
1289 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1290 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1291 SET_DR_MISALIGNMENT (dr, save_misalignment);
1294 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1295 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1297 if (inside_cost < min->inside_cost
1298 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1300 min->inside_cost = inside_cost;
1301 min->outside_cost = outside_cost;
1302 min->peel_info.dr = elem->dr;
1303 min->peel_info.npeel = elem->npeel;
1306 return 1;
1310 /* Choose best peeling option by traversing peeling hash table and either
1311 choosing an option with the lowest cost (if cost model is enabled) or the
1312 option that aligns as many accesses as possible. */
1314 static struct data_reference *
1315 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1316 unsigned int *npeel)
1318 struct _vect_peel_extended_info res;
1320 res.peel_info.dr = NULL;
1322 if (flag_vect_cost_model)
1324 res.inside_cost = INT_MAX;
1325 res.outside_cost = INT_MAX;
1326 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1327 vect_peeling_hash_get_lowest_cost, &res);
1329 else
1331 res.peel_info.count = 0;
1332 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1333 vect_peeling_hash_get_most_frequent, &res);
1336 *npeel = res.peel_info.npeel;
1337 return res.peel_info.dr;
1341 /* Function vect_enhance_data_refs_alignment
1343 This pass will use loop versioning and loop peeling in order to enhance
1344 the alignment of data references in the loop.
1346 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1347 original loop is to be vectorized. Any other loops that are created by
1348 the transformations performed in this pass - are not supposed to be
1349 vectorized. This restriction will be relaxed.
1351 This pass will require a cost model to guide it whether to apply peeling
1352 or versioning or a combination of the two. For example, the scheme that
1353 intel uses when given a loop with several memory accesses, is as follows:
1354 choose one memory access ('p') which alignment you want to force by doing
1355 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1356 other accesses are not necessarily aligned, or (2) use loop versioning to
1357 generate one loop in which all accesses are aligned, and another loop in
1358 which only 'p' is necessarily aligned.
1360 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1361 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1362 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1364 Devising a cost model is the most critical aspect of this work. It will
1365 guide us on which access to peel for, whether to use loop versioning, how
1366 many versions to create, etc. The cost model will probably consist of
1367 generic considerations as well as target specific considerations (on
1368 powerpc for example, misaligned stores are more painful than misaligned
1369 loads).
1371 Here are the general steps involved in alignment enhancements:
1373 -- original loop, before alignment analysis:
1374 for (i=0; i<N; i++){
1375 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1376 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1379 -- After vect_compute_data_refs_alignment:
1380 for (i=0; i<N; i++){
1381 x = q[i]; # DR_MISALIGNMENT(q) = 3
1382 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1385 -- Possibility 1: we do loop versioning:
1386 if (p is aligned) {
1387 for (i=0; i<N; i++){ # loop 1A
1388 x = q[i]; # DR_MISALIGNMENT(q) = 3
1389 p[i] = y; # DR_MISALIGNMENT(p) = 0
1392 else {
1393 for (i=0; i<N; i++){ # loop 1B
1394 x = q[i]; # DR_MISALIGNMENT(q) = 3
1395 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1399 -- Possibility 2: we do loop peeling:
1400 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1401 x = q[i];
1402 p[i] = y;
1404 for (i = 3; i < N; i++){ # loop 2A
1405 x = q[i]; # DR_MISALIGNMENT(q) = 0
1406 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1409 -- Possibility 3: combination of loop peeling and versioning:
1410 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1411 x = q[i];
1412 p[i] = y;
1414 if (p is aligned) {
1415 for (i = 3; i<N; i++){ # loop 3A
1416 x = q[i]; # DR_MISALIGNMENT(q) = 0
1417 p[i] = y; # DR_MISALIGNMENT(p) = 0
1420 else {
1421 for (i = 3; i<N; i++){ # loop 3B
1422 x = q[i]; # DR_MISALIGNMENT(q) = 0
1423 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1427 These loops are later passed to loop_transform to be vectorized. The
1428 vectorizer will use the alignment information to guide the transformation
1429 (whether to generate regular loads/stores, or with special handling for
1430 misalignment). */
1432 bool
1433 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1435 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1436 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1437 enum dr_alignment_support supportable_dr_alignment;
1438 struct data_reference *dr0 = NULL, *first_store = NULL;
1439 struct data_reference *dr;
1440 unsigned int i, j;
1441 bool do_peeling = false;
1442 bool do_versioning = false;
1443 bool stat;
1444 gimple stmt;
1445 stmt_vec_info stmt_info;
1446 int vect_versioning_for_alias_required;
1447 unsigned int npeel = 0;
1448 bool all_misalignments_unknown = true;
1449 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1450 unsigned possible_npeel_number = 1;
1451 tree vectype;
1452 unsigned int nelements, mis, same_align_drs_max = 0;
1454 if (vect_print_dump_info (REPORT_DETAILS))
1455 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1457 /* While cost model enhancements are expected in the future, the high level
1458 view of the code at this time is as follows:
1460 A) If there is a misaligned access then see if peeling to align
1461 this access can make all data references satisfy
1462 vect_supportable_dr_alignment. If so, update data structures
1463 as needed and return true.
1465 B) If peeling wasn't possible and there is a data reference with an
1466 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1467 then see if loop versioning checks can be used to make all data
1468 references satisfy vect_supportable_dr_alignment. If so, update
1469 data structures as needed and return true.
1471 C) If neither peeling nor versioning were successful then return false if
1472 any data reference does not satisfy vect_supportable_dr_alignment.
1474 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1476 Note, Possibility 3 above (which is peeling and versioning together) is not
1477 being done at this time. */
1479 /* (1) Peeling to force alignment. */
1481 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1482 Considerations:
1483 + How many accesses will become aligned due to the peeling
1484 - How many accesses will become unaligned due to the peeling,
1485 and the cost of misaligned accesses.
1486 - The cost of peeling (the extra runtime checks, the increase
1487 in code size). */
1489 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1491 stmt = DR_STMT (dr);
1492 stmt_info = vinfo_for_stmt (stmt);
1494 /* For interleaving, only the alignment of the first access
1495 matters. */
1496 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1497 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1498 continue;
1500 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1501 do_peeling = vector_alignment_reachable_p (dr);
1502 if (do_peeling)
1504 if (known_alignment_for_access_p (dr))
1506 unsigned int npeel_tmp;
1507 bool negative = tree_int_cst_compare (DR_STEP (dr),
1508 size_zero_node) < 0;
1510 /* Save info about DR in the hash table. */
1511 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1512 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1513 htab_create (1, vect_peeling_hash,
1514 vect_peeling_hash_eq, free);
1516 vectype = STMT_VINFO_VECTYPE (stmt_info);
1517 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1518 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1519 TREE_TYPE (DR_REF (dr))));
1520 npeel_tmp = (negative
1521 ? (mis - nelements) : (nelements - mis))
1522 & (nelements - 1);
1524 /* For multiple types, it is possible that the bigger type access
1525 will have more than one peeling option. E.g., a loop with two
1526 types: one of size (vector size / 4), and the other one of
1527 size (vector size / 8). Vectorization factor will 8. If both
1528 access are misaligned by 3, the first one needs one scalar
1529 iteration to be aligned, and the second one needs 5. But the
1530 the first one will be aligned also by peeling 5 scalar
1531 iterations, and in that case both accesses will be aligned.
1532 Hence, except for the immediate peeling amount, we also want
1533 to try to add full vector size, while we don't exceed
1534 vectorization factor.
1535 We do this automtically for cost model, since we calculate cost
1536 for every peeling option. */
1537 if (!flag_vect_cost_model)
1538 possible_npeel_number = vf /nelements;
1540 /* Handle the aligned case. We may decide to align some other
1541 access, making DR unaligned. */
1542 if (DR_MISALIGNMENT (dr) == 0)
1544 npeel_tmp = 0;
1545 if (!flag_vect_cost_model)
1546 possible_npeel_number++;
1549 for (j = 0; j < possible_npeel_number; j++)
1551 gcc_assert (npeel_tmp <= vf);
1552 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1553 npeel_tmp += nelements;
1556 all_misalignments_unknown = false;
1557 /* Data-ref that was chosen for the case that all the
1558 misalignments are unknown is not relevant anymore, since we
1559 have a data-ref with known alignment. */
1560 dr0 = NULL;
1562 else
1564 /* If we don't know all the misalignment values, we prefer
1565 peeling for data-ref that has maximum number of data-refs
1566 with the same alignment, unless the target prefers to align
1567 stores over load. */
1568 if (all_misalignments_unknown)
1570 if (same_align_drs_max < VEC_length (dr_p,
1571 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1572 || !dr0)
1574 same_align_drs_max = VEC_length (dr_p,
1575 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1576 dr0 = dr;
1579 if (!first_store && DR_IS_WRITE (dr))
1580 first_store = dr;
1583 /* If there are both known and unknown misaligned accesses in the
1584 loop, we choose peeling amount according to the known
1585 accesses. */
1588 if (!supportable_dr_alignment)
1590 dr0 = dr;
1591 if (!first_store && DR_IS_WRITE (dr))
1592 first_store = dr;
1596 else
1598 if (!aligned_access_p (dr))
1600 if (vect_print_dump_info (REPORT_DETAILS))
1601 fprintf (vect_dump, "vector alignment may not be reachable");
1603 break;
1608 vect_versioning_for_alias_required
1609 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1611 /* Temporarily, if versioning for alias is required, we disable peeling
1612 until we support peeling and versioning. Often peeling for alignment
1613 will require peeling for loop-bound, which in turn requires that we
1614 know how to adjust the loop ivs after the loop. */
1615 if (vect_versioning_for_alias_required
1616 || !vect_can_advance_ivs_p (loop_vinfo)
1617 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1618 do_peeling = false;
1620 if (do_peeling && all_misalignments_unknown
1621 && vect_supportable_dr_alignment (dr0, false))
1624 /* Check if the target requires to prefer stores over loads, i.e., if
1625 misaligned stores are more expensive than misaligned loads (taking
1626 drs with same alignment into account). */
1627 if (first_store && DR_IS_READ (dr0))
1629 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1630 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1631 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1632 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1634 vect_get_data_access_cost (dr0, &load_inside_cost,
1635 &load_outside_cost);
1636 vect_get_data_access_cost (first_store, &store_inside_cost,
1637 &store_outside_cost);
1639 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1640 aligning the load DR0). */
1641 load_inside_penalty = store_inside_cost;
1642 load_outside_penalty = store_outside_cost;
1643 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1644 (vinfo_for_stmt (DR_STMT (first_store))),
1645 i, dr);
1646 i++)
1647 if (DR_IS_READ (dr))
1649 load_inside_penalty += load_inside_cost;
1650 load_outside_penalty += load_outside_cost;
1652 else
1654 load_inside_penalty += store_inside_cost;
1655 load_outside_penalty += store_outside_cost;
1658 /* Calculate the penalty for leaving DR0 unaligned (by
1659 aligning the FIRST_STORE). */
1660 store_inside_penalty = load_inside_cost;
1661 store_outside_penalty = load_outside_cost;
1662 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1663 (vinfo_for_stmt (DR_STMT (dr0))),
1664 i, dr);
1665 i++)
1666 if (DR_IS_READ (dr))
1668 store_inside_penalty += load_inside_cost;
1669 store_outside_penalty += load_outside_cost;
1671 else
1673 store_inside_penalty += store_inside_cost;
1674 store_outside_penalty += store_outside_cost;
1677 if (load_inside_penalty > store_inside_penalty
1678 || (load_inside_penalty == store_inside_penalty
1679 && load_outside_penalty > store_outside_penalty))
1680 dr0 = first_store;
1683 /* In case there are only loads with different unknown misalignments, use
1684 peeling only if it may help to align other accesses in the loop. */
1685 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1686 (vinfo_for_stmt (DR_STMT (dr0))))
1687 && vect_supportable_dr_alignment (dr0, false)
1688 != dr_unaligned_supported)
1689 do_peeling = false;
1692 if (do_peeling && !dr0)
1694 /* Peeling is possible, but there is no data access that is not supported
1695 unless aligned. So we try to choose the best possible peeling. */
1697 /* We should get here only if there are drs with known misalignment. */
1698 gcc_assert (!all_misalignments_unknown);
1700 /* Choose the best peeling from the hash table. */
1701 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1702 if (!dr0 || !npeel)
1703 do_peeling = false;
1706 if (do_peeling)
1708 stmt = DR_STMT (dr0);
1709 stmt_info = vinfo_for_stmt (stmt);
1710 vectype = STMT_VINFO_VECTYPE (stmt_info);
1711 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1713 if (known_alignment_for_access_p (dr0))
1715 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1716 size_zero_node) < 0;
1717 if (!npeel)
1719 /* Since it's known at compile time, compute the number of
1720 iterations in the peeled loop (the peeling factor) for use in
1721 updating DR_MISALIGNMENT values. The peeling factor is the
1722 vectorization factor minus the misalignment as an element
1723 count. */
1724 mis = DR_MISALIGNMENT (dr0);
1725 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1726 npeel = ((negative ? mis - nelements : nelements - mis)
1727 & (nelements - 1));
1730 /* For interleaved data access every iteration accesses all the
1731 members of the group, therefore we divide the number of iterations
1732 by the group size. */
1733 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1734 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1735 npeel /= GROUP_SIZE (stmt_info);
1737 if (vect_print_dump_info (REPORT_DETAILS))
1738 fprintf (vect_dump, "Try peeling by %d", npeel);
1741 /* Ensure that all data refs can be vectorized after the peel. */
1742 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1744 int save_misalignment;
1746 if (dr == dr0)
1747 continue;
1749 stmt = DR_STMT (dr);
1750 stmt_info = vinfo_for_stmt (stmt);
1751 /* For interleaving, only the alignment of the first access
1752 matters. */
1753 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1754 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1755 continue;
1757 save_misalignment = DR_MISALIGNMENT (dr);
1758 vect_update_misalignment_for_peel (dr, dr0, npeel);
1759 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1760 SET_DR_MISALIGNMENT (dr, save_misalignment);
1762 if (!supportable_dr_alignment)
1764 do_peeling = false;
1765 break;
1769 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1771 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1772 if (!stat)
1773 do_peeling = false;
1774 else
1775 return stat;
1778 if (do_peeling)
1780 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1781 If the misalignment of DR_i is identical to that of dr0 then set
1782 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1783 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1784 by the peeling factor times the element size of DR_i (MOD the
1785 vectorization factor times the size). Otherwise, the
1786 misalignment of DR_i must be set to unknown. */
1787 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1788 if (dr != dr0)
1789 vect_update_misalignment_for_peel (dr, dr0, npeel);
1791 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1792 if (npeel)
1793 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1794 else
1795 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1796 SET_DR_MISALIGNMENT (dr0, 0);
1797 if (vect_print_dump_info (REPORT_ALIGNMENT))
1798 fprintf (vect_dump, "Alignment of access forced using peeling.");
1800 if (vect_print_dump_info (REPORT_DETAILS))
1801 fprintf (vect_dump, "Peeling for alignment will be applied.");
1803 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1804 gcc_assert (stat);
1805 return stat;
1810 /* (2) Versioning to force alignment. */
1812 /* Try versioning if:
1813 1) flag_tree_vect_loop_version is TRUE
1814 2) optimize loop for speed
1815 3) there is at least one unsupported misaligned data ref with an unknown
1816 misalignment, and
1817 4) all misaligned data refs with a known misalignment are supported, and
1818 5) the number of runtime alignment checks is within reason. */
1820 do_versioning =
1821 flag_tree_vect_loop_version
1822 && optimize_loop_nest_for_speed_p (loop)
1823 && (!loop->inner); /* FORNOW */
1825 if (do_versioning)
1827 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1829 stmt = DR_STMT (dr);
1830 stmt_info = vinfo_for_stmt (stmt);
1832 /* For interleaving, only the alignment of the first access
1833 matters. */
1834 if (aligned_access_p (dr)
1835 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1836 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1837 continue;
1839 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1841 if (!supportable_dr_alignment)
1843 gimple stmt;
1844 int mask;
1845 tree vectype;
1847 if (known_alignment_for_access_p (dr)
1848 || VEC_length (gimple,
1849 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1850 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1852 do_versioning = false;
1853 break;
1856 stmt = DR_STMT (dr);
1857 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1858 gcc_assert (vectype);
1860 /* The rightmost bits of an aligned address must be zeros.
1861 Construct the mask needed for this test. For example,
1862 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1863 mask must be 15 = 0xf. */
1864 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1866 /* FORNOW: use the same mask to test all potentially unaligned
1867 references in the loop. The vectorizer currently supports
1868 a single vector size, see the reference to
1869 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1870 vectorization factor is computed. */
1871 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1872 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1873 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1874 VEC_safe_push (gimple, heap,
1875 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1876 DR_STMT (dr));
1880 /* Versioning requires at least one misaligned data reference. */
1881 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1882 do_versioning = false;
1883 else if (!do_versioning)
1884 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1887 if (do_versioning)
1889 VEC(gimple,heap) *may_misalign_stmts
1890 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1891 gimple stmt;
1893 /* It can now be assumed that the data references in the statements
1894 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1895 of the loop being vectorized. */
1896 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1898 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1899 dr = STMT_VINFO_DATA_REF (stmt_info);
1900 SET_DR_MISALIGNMENT (dr, 0);
1901 if (vect_print_dump_info (REPORT_ALIGNMENT))
1902 fprintf (vect_dump, "Alignment of access forced using versioning.");
1905 if (vect_print_dump_info (REPORT_DETAILS))
1906 fprintf (vect_dump, "Versioning for alignment will be applied.");
1908 /* Peeling and versioning can't be done together at this time. */
1909 gcc_assert (! (do_peeling && do_versioning));
1911 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1912 gcc_assert (stat);
1913 return stat;
1916 /* This point is reached if neither peeling nor versioning is being done. */
1917 gcc_assert (! (do_peeling || do_versioning));
1919 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1920 return stat;
1924 /* Function vect_find_same_alignment_drs.
1926 Update group and alignment relations according to the chosen
1927 vectorization factor. */
1929 static void
1930 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1931 loop_vec_info loop_vinfo)
1933 unsigned int i;
1934 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1935 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1936 struct data_reference *dra = DDR_A (ddr);
1937 struct data_reference *drb = DDR_B (ddr);
1938 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1939 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1940 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1941 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1942 lambda_vector dist_v;
1943 unsigned int loop_depth;
1945 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1946 return;
1948 if (dra == drb)
1949 return;
1951 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1952 return;
1954 /* Loop-based vectorization and known data dependence. */
1955 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1956 return;
1958 /* Data-dependence analysis reports a distance vector of zero
1959 for data-references that overlap only in the first iteration
1960 but have different sign step (see PR45764).
1961 So as a sanity check require equal DR_STEP. */
1962 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1963 return;
1965 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1966 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1968 int dist = dist_v[loop_depth];
1970 if (vect_print_dump_info (REPORT_DR_DETAILS))
1971 fprintf (vect_dump, "dependence distance = %d.", dist);
1973 /* Same loop iteration. */
1974 if (dist == 0
1975 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1977 /* Two references with distance zero have the same alignment. */
1978 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1979 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1980 if (vect_print_dump_info (REPORT_ALIGNMENT))
1981 fprintf (vect_dump, "accesses have the same alignment.");
1982 if (vect_print_dump_info (REPORT_DR_DETAILS))
1984 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1985 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1986 fprintf (vect_dump, " and ");
1987 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1994 /* Function vect_analyze_data_refs_alignment
1996 Analyze the alignment of the data-references in the loop.
1997 Return FALSE if a data reference is found that cannot be vectorized. */
1999 bool
2000 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2001 bb_vec_info bb_vinfo)
2003 if (vect_print_dump_info (REPORT_DETAILS))
2004 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2006 /* Mark groups of data references with same alignment using
2007 data dependence information. */
2008 if (loop_vinfo)
2010 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2011 struct data_dependence_relation *ddr;
2012 unsigned int i;
2014 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2015 vect_find_same_alignment_drs (ddr, loop_vinfo);
2018 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2020 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2021 fprintf (vect_dump,
2022 "not vectorized: can't calculate alignment for data ref.");
2023 return false;
2026 return true;
2030 /* Analyze groups of strided accesses: check that DR belongs to a group of
2031 strided accesses of legal size, step, etc. Detect gaps, single element
2032 interleaving, and other special cases. Set strided access info.
2033 Collect groups of strided stores for further use in SLP analysis. */
2035 static bool
2036 vect_analyze_group_access (struct data_reference *dr)
2038 tree step = DR_STEP (dr);
2039 tree scalar_type = TREE_TYPE (DR_REF (dr));
2040 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2041 gimple stmt = DR_STMT (dr);
2042 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2043 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2044 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2045 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2046 HOST_WIDE_INT stride, last_accessed_element = 1;
2047 bool slp_impossible = false;
2049 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2050 interleaving group (including gaps). */
2051 stride = dr_step / type_size;
2053 /* Not consecutive access is possible only if it is a part of interleaving. */
2054 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2056 /* Check if it this DR is a part of interleaving, and is a single
2057 element of the group that is accessed in the loop. */
2059 /* Gaps are supported only for loads. STEP must be a multiple of the type
2060 size. The size of the group must be a power of 2. */
2061 if (DR_IS_READ (dr)
2062 && (dr_step % type_size) == 0
2063 && stride > 0
2064 && exact_log2 (stride) != -1)
2066 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2067 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2068 if (vect_print_dump_info (REPORT_DR_DETAILS))
2070 fprintf (vect_dump, "Detected single element interleaving ");
2071 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2072 fprintf (vect_dump, " step ");
2073 print_generic_expr (vect_dump, step, TDF_SLIM);
2076 if (loop_vinfo)
2078 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2080 if (vect_print_dump_info (REPORT_DETAILS))
2081 fprintf (vect_dump, "Data access with gaps requires scalar "
2082 "epilogue loop");
2085 return true;
2088 if (vect_print_dump_info (REPORT_DETAILS))
2090 fprintf (vect_dump, "not consecutive access ");
2091 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2094 if (bb_vinfo)
2096 /* Mark the statement as unvectorizable. */
2097 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2098 return true;
2101 return false;
2104 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2106 /* First stmt in the interleaving chain. Check the chain. */
2107 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2108 struct data_reference *data_ref = dr;
2109 unsigned int count = 1;
2110 tree next_step;
2111 tree prev_init = DR_INIT (data_ref);
2112 gimple prev = stmt;
2113 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2115 while (next)
2117 /* Skip same data-refs. In case that two or more stmts share
2118 data-ref (supported only for loads), we vectorize only the first
2119 stmt, and the rest get their vectorized loads from the first
2120 one. */
2121 if (!tree_int_cst_compare (DR_INIT (data_ref),
2122 DR_INIT (STMT_VINFO_DATA_REF (
2123 vinfo_for_stmt (next)))))
2125 if (DR_IS_WRITE (data_ref))
2127 if (vect_print_dump_info (REPORT_DETAILS))
2128 fprintf (vect_dump, "Two store stmts share the same dr.");
2129 return false;
2132 /* Check that there is no load-store dependencies for this loads
2133 to prevent a case of load-store-load to the same location. */
2134 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2135 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2137 if (vect_print_dump_info (REPORT_DETAILS))
2138 fprintf (vect_dump,
2139 "READ_WRITE dependence in interleaving.");
2140 return false;
2143 /* For load use the same data-ref load. */
2144 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2146 prev = next;
2147 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2148 continue;
2151 prev = next;
2153 /* Check that all the accesses have the same STEP. */
2154 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2155 if (tree_int_cst_compare (step, next_step))
2157 if (vect_print_dump_info (REPORT_DETAILS))
2158 fprintf (vect_dump, "not consecutive access in interleaving");
2159 return false;
2162 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2163 /* Check that the distance between two accesses is equal to the type
2164 size. Otherwise, we have gaps. */
2165 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2166 - TREE_INT_CST_LOW (prev_init)) / type_size;
2167 if (diff != 1)
2169 /* FORNOW: SLP of accesses with gaps is not supported. */
2170 slp_impossible = true;
2171 if (DR_IS_WRITE (data_ref))
2173 if (vect_print_dump_info (REPORT_DETAILS))
2174 fprintf (vect_dump, "interleaved store with gaps");
2175 return false;
2178 gaps += diff - 1;
2181 last_accessed_element += diff;
2183 /* Store the gap from the previous member of the group. If there is no
2184 gap in the access, GROUP_GAP is always 1. */
2185 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2187 prev_init = DR_INIT (data_ref);
2188 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2189 /* Count the number of data-refs in the chain. */
2190 count++;
2193 /* COUNT is the number of accesses found, we multiply it by the size of
2194 the type to get COUNT_IN_BYTES. */
2195 count_in_bytes = type_size * count;
2197 /* Check that the size of the interleaving (including gaps) is not
2198 greater than STEP. */
2199 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2201 if (vect_print_dump_info (REPORT_DETAILS))
2203 fprintf (vect_dump, "interleaving size is greater than step for ");
2204 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2206 return false;
2209 /* Check that the size of the interleaving is equal to STEP for stores,
2210 i.e., that there are no gaps. */
2211 if (dr_step && dr_step != count_in_bytes)
2213 if (DR_IS_READ (dr))
2215 slp_impossible = true;
2216 /* There is a gap after the last load in the group. This gap is a
2217 difference between the stride and the number of elements. When
2218 there is no gap, this difference should be 0. */
2219 GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2221 else
2223 if (vect_print_dump_info (REPORT_DETAILS))
2224 fprintf (vect_dump, "interleaved store with gaps");
2225 return false;
2229 /* Check that STEP is a multiple of type size. */
2230 if (dr_step && (dr_step % type_size) != 0)
2232 if (vect_print_dump_info (REPORT_DETAILS))
2234 fprintf (vect_dump, "step is not a multiple of type size: step ");
2235 print_generic_expr (vect_dump, step, TDF_SLIM);
2236 fprintf (vect_dump, " size ");
2237 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2238 TDF_SLIM);
2240 return false;
2243 if (stride == 0)
2244 stride = count;
2246 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2247 if (vect_print_dump_info (REPORT_DETAILS))
2248 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2250 /* SLP: create an SLP data structure for every interleaving group of
2251 stores for further analysis in vect_analyse_slp. */
2252 if (DR_IS_WRITE (dr) && !slp_impossible)
2254 if (loop_vinfo)
2255 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2256 stmt);
2257 if (bb_vinfo)
2258 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2259 stmt);
2262 /* There is a gap in the end of the group. */
2263 if (stride - last_accessed_element > 0 && loop_vinfo)
2265 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2266 if (vect_print_dump_info (REPORT_DETAILS))
2267 fprintf (vect_dump, "Data access with gaps requires scalar "
2268 "epilogue loop");
2272 return true;
2276 /* Analyze the access pattern of the data-reference DR.
2277 In case of non-consecutive accesses call vect_analyze_group_access() to
2278 analyze groups of strided accesses. */
2280 static bool
2281 vect_analyze_data_ref_access (struct data_reference *dr)
2283 tree step = DR_STEP (dr);
2284 tree scalar_type = TREE_TYPE (DR_REF (dr));
2285 gimple stmt = DR_STMT (dr);
2286 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2287 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2288 struct loop *loop = NULL;
2289 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2291 if (loop_vinfo)
2292 loop = LOOP_VINFO_LOOP (loop_vinfo);
2294 if (loop_vinfo && !step)
2296 if (vect_print_dump_info (REPORT_DETAILS))
2297 fprintf (vect_dump, "bad data-ref access in loop");
2298 return false;
2301 /* Don't allow invariant accesses in loops. */
2302 if (loop_vinfo && dr_step == 0)
2303 return false;
2305 if (loop && nested_in_vect_loop_p (loop, stmt))
2307 /* Interleaved accesses are not yet supported within outer-loop
2308 vectorization for references in the inner-loop. */
2309 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2311 /* For the rest of the analysis we use the outer-loop step. */
2312 step = STMT_VINFO_DR_STEP (stmt_info);
2313 dr_step = TREE_INT_CST_LOW (step);
2315 if (dr_step == 0)
2317 if (vect_print_dump_info (REPORT_ALIGNMENT))
2318 fprintf (vect_dump, "zero step in outer loop.");
2319 if (DR_IS_READ (dr))
2320 return true;
2321 else
2322 return false;
2326 /* Consecutive? */
2327 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2328 || (dr_step < 0
2329 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2331 /* Mark that it is not interleaving. */
2332 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2333 return true;
2336 if (loop && nested_in_vect_loop_p (loop, stmt))
2338 if (vect_print_dump_info (REPORT_ALIGNMENT))
2339 fprintf (vect_dump, "strided access in outer loop.");
2340 return false;
2343 /* Not consecutive access - check if it's a part of interleaving group. */
2344 return vect_analyze_group_access (dr);
2348 /* Function vect_analyze_data_ref_accesses.
2350 Analyze the access pattern of all the data references in the loop.
2352 FORNOW: the only access pattern that is considered vectorizable is a
2353 simple step 1 (consecutive) access.
2355 FORNOW: handle only arrays and pointer accesses. */
2357 bool
2358 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2360 unsigned int i;
2361 VEC (data_reference_p, heap) *datarefs;
2362 struct data_reference *dr;
2364 if (vect_print_dump_info (REPORT_DETAILS))
2365 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2367 if (loop_vinfo)
2368 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2369 else
2370 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2372 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2373 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2374 && !vect_analyze_data_ref_access (dr))
2376 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2377 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2379 if (bb_vinfo)
2381 /* Mark the statement as not vectorizable. */
2382 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2383 continue;
2385 else
2386 return false;
2389 return true;
2392 /* Function vect_prune_runtime_alias_test_list.
2394 Prune a list of ddrs to be tested at run-time by versioning for alias.
2395 Return FALSE if resulting list of ddrs is longer then allowed by
2396 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2398 bool
2399 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2401 VEC (ddr_p, heap) * ddrs =
2402 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2403 unsigned i, j;
2405 if (vect_print_dump_info (REPORT_DETAILS))
2406 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2408 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2410 bool found;
2411 ddr_p ddr_i;
2413 ddr_i = VEC_index (ddr_p, ddrs, i);
2414 found = false;
2416 for (j = 0; j < i; j++)
2418 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2420 if (vect_vfa_range_equal (ddr_i, ddr_j))
2422 if (vect_print_dump_info (REPORT_DR_DETAILS))
2424 fprintf (vect_dump, "found equal ranges ");
2425 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2426 fprintf (vect_dump, ", ");
2427 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2428 fprintf (vect_dump, " and ");
2429 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2430 fprintf (vect_dump, ", ");
2431 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2433 found = true;
2434 break;
2438 if (found)
2440 VEC_ordered_remove (ddr_p, ddrs, i);
2441 continue;
2443 i++;
2446 if (VEC_length (ddr_p, ddrs) >
2447 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2449 if (vect_print_dump_info (REPORT_DR_DETAILS))
2451 fprintf (vect_dump,
2452 "disable versioning for alias - max number of generated "
2453 "checks exceeded.");
2456 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2458 return false;
2461 return true;
2465 /* Function vect_analyze_data_refs.
2467 Find all the data references in the loop or basic block.
2469 The general structure of the analysis of data refs in the vectorizer is as
2470 follows:
2471 1- vect_analyze_data_refs(loop/bb): call
2472 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2473 in the loop/bb and their dependences.
2474 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2475 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2476 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2480 bool
2481 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2482 bb_vec_info bb_vinfo,
2483 int *min_vf)
2485 struct loop *loop = NULL;
2486 basic_block bb = NULL;
2487 unsigned int i;
2488 VEC (data_reference_p, heap) *datarefs;
2489 struct data_reference *dr;
2490 tree scalar_type;
2491 bool res;
2493 if (vect_print_dump_info (REPORT_DETAILS))
2494 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2496 if (loop_vinfo)
2498 loop = LOOP_VINFO_LOOP (loop_vinfo);
2499 res = compute_data_dependences_for_loop
2500 (loop, true,
2501 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2502 &LOOP_VINFO_DATAREFS (loop_vinfo),
2503 &LOOP_VINFO_DDRS (loop_vinfo));
2505 if (!res)
2507 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2508 fprintf (vect_dump, "not vectorized: loop contains function calls"
2509 " or data references that cannot be analyzed");
2510 return false;
2513 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2515 else
2517 bb = BB_VINFO_BB (bb_vinfo);
2518 res = compute_data_dependences_for_bb (bb, true,
2519 &BB_VINFO_DATAREFS (bb_vinfo),
2520 &BB_VINFO_DDRS (bb_vinfo));
2521 if (!res)
2523 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2524 fprintf (vect_dump, "not vectorized: basic block contains function"
2525 " calls or data references that cannot be analyzed");
2526 return false;
2529 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2532 /* Go through the data-refs, check that the analysis succeeded. Update
2533 pointer from stmt_vec_info struct to DR and vectype. */
2535 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2537 gimple stmt;
2538 stmt_vec_info stmt_info;
2539 tree base, offset, init;
2540 int vf;
2542 if (!dr || !DR_REF (dr))
2544 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2545 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2546 return false;
2549 stmt = DR_STMT (dr);
2550 stmt_info = vinfo_for_stmt (stmt);
2552 /* Check that analysis of the data-ref succeeded. */
2553 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2554 || !DR_STEP (dr))
2556 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2558 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2559 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2562 if (bb_vinfo)
2564 /* Mark the statement as not vectorizable. */
2565 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2566 continue;
2568 else
2569 return false;
2572 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2574 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2575 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2576 "constant");
2577 if (bb_vinfo)
2579 /* Mark the statement as not vectorizable. */
2580 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2581 continue;
2583 else
2584 return false;
2587 if (TREE_THIS_VOLATILE (DR_REF (dr)))
2589 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2591 fprintf (vect_dump, "not vectorized: volatile type ");
2592 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2594 return false;
2597 base = unshare_expr (DR_BASE_ADDRESS (dr));
2598 offset = unshare_expr (DR_OFFSET (dr));
2599 init = unshare_expr (DR_INIT (dr));
2601 if (stmt_can_throw_internal (stmt))
2603 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2605 fprintf (vect_dump, "not vectorized: statement can throw an "
2606 "exception ");
2607 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2609 return false;
2612 /* Update DR field in stmt_vec_info struct. */
2614 /* If the dataref is in an inner-loop of the loop that is considered for
2615 for vectorization, we also want to analyze the access relative to
2616 the outer-loop (DR contains information only relative to the
2617 inner-most enclosing loop). We do that by building a reference to the
2618 first location accessed by the inner-loop, and analyze it relative to
2619 the outer-loop. */
2620 if (loop && nested_in_vect_loop_p (loop, stmt))
2622 tree outer_step, outer_base, outer_init;
2623 HOST_WIDE_INT pbitsize, pbitpos;
2624 tree poffset;
2625 enum machine_mode pmode;
2626 int punsignedp, pvolatilep;
2627 affine_iv base_iv, offset_iv;
2628 tree dinit;
2630 /* Build a reference to the first location accessed by the
2631 inner-loop: *(BASE+INIT). (The first location is actually
2632 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2633 tree inner_base = build_fold_indirect_ref
2634 (fold_build2 (POINTER_PLUS_EXPR,
2635 TREE_TYPE (base), base,
2636 fold_convert (sizetype, init)));
2638 if (vect_print_dump_info (REPORT_DETAILS))
2640 fprintf (vect_dump, "analyze in outer-loop: ");
2641 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2644 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2645 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2646 gcc_assert (outer_base != NULL_TREE);
2648 if (pbitpos % BITS_PER_UNIT != 0)
2650 if (vect_print_dump_info (REPORT_DETAILS))
2651 fprintf (vect_dump, "failed: bit offset alignment.\n");
2652 return false;
2655 outer_base = build_fold_addr_expr (outer_base);
2656 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2657 &base_iv, false))
2659 if (vect_print_dump_info (REPORT_DETAILS))
2660 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2661 return false;
2664 if (offset)
2666 if (poffset)
2667 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2668 poffset);
2669 else
2670 poffset = offset;
2673 if (!poffset)
2675 offset_iv.base = ssize_int (0);
2676 offset_iv.step = ssize_int (0);
2678 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2679 &offset_iv, false))
2681 if (vect_print_dump_info (REPORT_DETAILS))
2682 fprintf (vect_dump, "evolution of offset is not affine.\n");
2683 return false;
2686 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2687 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2688 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2689 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2690 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2692 outer_step = size_binop (PLUS_EXPR,
2693 fold_convert (ssizetype, base_iv.step),
2694 fold_convert (ssizetype, offset_iv.step));
2696 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2697 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2698 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2699 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2700 STMT_VINFO_DR_OFFSET (stmt_info) =
2701 fold_convert (ssizetype, offset_iv.base);
2702 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2703 size_int (highest_pow2_factor (offset_iv.base));
2705 if (vect_print_dump_info (REPORT_DETAILS))
2707 fprintf (vect_dump, "\touter base_address: ");
2708 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2709 fprintf (vect_dump, "\n\touter offset from base address: ");
2710 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2711 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2712 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2713 fprintf (vect_dump, "\n\touter step: ");
2714 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2715 fprintf (vect_dump, "\n\touter aligned to: ");
2716 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2720 if (STMT_VINFO_DATA_REF (stmt_info))
2722 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2724 fprintf (vect_dump,
2725 "not vectorized: more than one data ref in stmt: ");
2726 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2728 return false;
2731 STMT_VINFO_DATA_REF (stmt_info) = dr;
2733 /* Set vectype for STMT. */
2734 scalar_type = TREE_TYPE (DR_REF (dr));
2735 STMT_VINFO_VECTYPE (stmt_info) =
2736 get_vectype_for_scalar_type (scalar_type);
2737 if (!STMT_VINFO_VECTYPE (stmt_info))
2739 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2741 fprintf (vect_dump,
2742 "not vectorized: no vectype for stmt: ");
2743 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2744 fprintf (vect_dump, " scalar_type: ");
2745 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2748 if (bb_vinfo)
2750 /* Mark the statement as not vectorizable. */
2751 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2752 continue;
2754 else
2755 return false;
2758 /* Adjust the minimal vectorization factor according to the
2759 vector type. */
2760 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2761 if (vf > *min_vf)
2762 *min_vf = vf;
2765 return true;
2769 /* Function vect_get_new_vect_var.
2771 Returns a name for a new variable. The current naming scheme appends the
2772 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2773 the name of vectorizer generated variables, and appends that to NAME if
2774 provided. */
2776 tree
2777 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2779 const char *prefix;
2780 tree new_vect_var;
2782 switch (var_kind)
2784 case vect_simple_var:
2785 prefix = "vect_";
2786 break;
2787 case vect_scalar_var:
2788 prefix = "stmp_";
2789 break;
2790 case vect_pointer_var:
2791 prefix = "vect_p";
2792 break;
2793 default:
2794 gcc_unreachable ();
2797 if (name)
2799 char* tmp = concat (prefix, name, NULL);
2800 new_vect_var = create_tmp_var (type, tmp);
2801 free (tmp);
2803 else
2804 new_vect_var = create_tmp_var (type, prefix);
2806 /* Mark vector typed variable as a gimple register variable. */
2807 if (TREE_CODE (type) == VECTOR_TYPE)
2808 DECL_GIMPLE_REG_P (new_vect_var) = true;
2810 return new_vect_var;
2814 /* Function vect_create_addr_base_for_vector_ref.
2816 Create an expression that computes the address of the first memory location
2817 that will be accessed for a data reference.
2819 Input:
2820 STMT: The statement containing the data reference.
2821 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2822 OFFSET: Optional. If supplied, it is be added to the initial address.
2823 LOOP: Specify relative to which loop-nest should the address be computed.
2824 For example, when the dataref is in an inner-loop nested in an
2825 outer-loop that is now being vectorized, LOOP can be either the
2826 outer-loop, or the inner-loop. The first memory location accessed
2827 by the following dataref ('in' points to short):
2829 for (i=0; i<N; i++)
2830 for (j=0; j<M; j++)
2831 s += in[i+j]
2833 is as follows:
2834 if LOOP=i_loop: &in (relative to i_loop)
2835 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2837 Output:
2838 1. Return an SSA_NAME whose value is the address of the memory location of
2839 the first vector of the data reference.
2840 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2841 these statement(s) which define the returned SSA_NAME.
2843 FORNOW: We are only handling array accesses with step 1. */
2845 tree
2846 vect_create_addr_base_for_vector_ref (gimple stmt,
2847 gimple_seq *new_stmt_list,
2848 tree offset,
2849 struct loop *loop)
2851 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2852 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2853 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2854 tree base_name;
2855 tree data_ref_base_var;
2856 tree vec_stmt;
2857 tree addr_base, addr_expr;
2858 tree dest;
2859 gimple_seq seq = NULL;
2860 tree base_offset = unshare_expr (DR_OFFSET (dr));
2861 tree init = unshare_expr (DR_INIT (dr));
2862 tree vect_ptr_type;
2863 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2864 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2865 tree base;
2867 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2869 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2871 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2873 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2874 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2875 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2878 if (loop_vinfo)
2879 base_name = build_fold_indirect_ref (data_ref_base);
2880 else
2882 base_offset = ssize_int (0);
2883 init = ssize_int (0);
2884 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2887 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2888 add_referenced_var (data_ref_base_var);
2889 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2890 data_ref_base_var);
2891 gimple_seq_add_seq (new_stmt_list, seq);
2893 /* Create base_offset */
2894 base_offset = size_binop (PLUS_EXPR,
2895 fold_convert (sizetype, base_offset),
2896 fold_convert (sizetype, init));
2897 dest = create_tmp_var (sizetype, "base_off");
2898 add_referenced_var (dest);
2899 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2900 gimple_seq_add_seq (new_stmt_list, seq);
2902 if (offset)
2904 tree tmp = create_tmp_var (sizetype, "offset");
2906 add_referenced_var (tmp);
2907 offset = fold_build2 (MULT_EXPR, sizetype,
2908 fold_convert (sizetype, offset), step);
2909 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2910 base_offset, offset);
2911 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2912 gimple_seq_add_seq (new_stmt_list, seq);
2915 /* base + base_offset */
2916 if (loop_vinfo)
2917 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2918 data_ref_base, base_offset);
2919 else
2921 addr_base = build1 (ADDR_EXPR,
2922 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2923 unshare_expr (DR_REF (dr)));
2926 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2927 base = get_base_address (DR_REF (dr));
2928 if (base
2929 && TREE_CODE (base) == MEM_REF)
2930 vect_ptr_type
2931 = build_qualified_type (vect_ptr_type,
2932 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2934 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2935 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2936 get_name (base_name));
2937 add_referenced_var (addr_expr);
2938 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2939 gimple_seq_add_seq (new_stmt_list, seq);
2941 if (DR_PTR_INFO (dr)
2942 && TREE_CODE (vec_stmt) == SSA_NAME)
2944 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2945 if (offset)
2947 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2948 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2952 if (vect_print_dump_info (REPORT_DETAILS))
2954 fprintf (vect_dump, "created ");
2955 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2958 return vec_stmt;
2962 /* Function vect_create_data_ref_ptr.
2964 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
2965 location accessed in the loop by STMT, along with the def-use update
2966 chain to appropriately advance the pointer through the loop iterations.
2967 Also set aliasing information for the pointer. This pointer is used by
2968 the callers to this function to create a memory reference expression for
2969 vector load/store access.
2971 Input:
2972 1. STMT: a stmt that references memory. Expected to be of the form
2973 GIMPLE_ASSIGN <name, data-ref> or
2974 GIMPLE_ASSIGN <data-ref, name>.
2975 2. AGGR_TYPE: the type of the reference, which should be either a vector
2976 or an array.
2977 3. AT_LOOP: the loop where the vector memref is to be created.
2978 4. OFFSET (optional): an offset to be added to the initial address accessed
2979 by the data-ref in STMT.
2980 5. BSI: location where the new stmts are to be placed if there is no loop
2981 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
2982 pointing to the initial address.
2984 Output:
2985 1. Declare a new ptr to vector_type, and have it point to the base of the
2986 data reference (initial addressed accessed by the data reference).
2987 For example, for vector of type V8HI, the following code is generated:
2989 v8hi *ap;
2990 ap = (v8hi *)initial_address;
2992 if OFFSET is not supplied:
2993 initial_address = &a[init];
2994 if OFFSET is supplied:
2995 initial_address = &a[init + OFFSET];
2997 Return the initial_address in INITIAL_ADDRESS.
2999 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3000 update the pointer in each iteration of the loop.
3002 Return the increment stmt that updates the pointer in PTR_INCR.
3004 3. Set INV_P to true if the access pattern of the data reference in the
3005 vectorized loop is invariant. Set it to false otherwise.
3007 4. Return the pointer. */
3009 tree
3010 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3011 tree offset, tree *initial_address,
3012 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3013 bool only_init, bool *inv_p)
3015 tree base_name;
3016 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3017 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3018 struct loop *loop = NULL;
3019 bool nested_in_vect_loop = false;
3020 struct loop *containing_loop = NULL;
3021 tree aggr_ptr_type;
3022 tree aggr_ptr;
3023 tree new_temp;
3024 gimple vec_stmt;
3025 gimple_seq new_stmt_list = NULL;
3026 edge pe = NULL;
3027 basic_block new_bb;
3028 tree aggr_ptr_init;
3029 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3030 tree aptr;
3031 gimple_stmt_iterator incr_gsi;
3032 bool insert_after;
3033 bool negative;
3034 tree indx_before_incr, indx_after_incr;
3035 gimple incr;
3036 tree step;
3037 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3038 tree base;
3040 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3041 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3043 if (loop_vinfo)
3045 loop = LOOP_VINFO_LOOP (loop_vinfo);
3046 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3047 containing_loop = (gimple_bb (stmt))->loop_father;
3048 pe = loop_preheader_edge (loop);
3050 else
3052 gcc_assert (bb_vinfo);
3053 only_init = true;
3054 *ptr_incr = NULL;
3057 /* Check the step (evolution) of the load in LOOP, and record
3058 whether it's invariant. */
3059 if (nested_in_vect_loop)
3060 step = STMT_VINFO_DR_STEP (stmt_info);
3061 else
3062 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3064 if (tree_int_cst_compare (step, size_zero_node) == 0)
3065 *inv_p = true;
3066 else
3067 *inv_p = false;
3068 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3070 /* Create an expression for the first address accessed by this load
3071 in LOOP. */
3072 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3074 if (vect_print_dump_info (REPORT_DETAILS))
3076 tree data_ref_base = base_name;
3077 fprintf (vect_dump, "create %s-pointer variable to type: ",
3078 tree_code_name[(int) TREE_CODE (aggr_type)]);
3079 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3080 if (TREE_CODE (data_ref_base) == VAR_DECL
3081 || TREE_CODE (data_ref_base) == ARRAY_REF)
3082 fprintf (vect_dump, " vectorizing an array ref: ");
3083 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3084 fprintf (vect_dump, " vectorizing a record based array ref: ");
3085 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3086 fprintf (vect_dump, " vectorizing a pointer ref: ");
3087 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3090 /* (1) Create the new aggregate-pointer variable. */
3091 aggr_ptr_type = build_pointer_type (aggr_type);
3092 base = get_base_address (DR_REF (dr));
3093 if (base
3094 && TREE_CODE (base) == MEM_REF)
3095 aggr_ptr_type
3096 = build_qualified_type (aggr_ptr_type,
3097 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3098 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3099 get_name (base_name));
3101 /* Vector and array types inherit the alias set of their component
3102 type by default so we need to use a ref-all pointer if the data
3103 reference does not conflict with the created aggregated data
3104 reference because it is not addressable. */
3105 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3106 get_alias_set (DR_REF (dr))))
3108 aggr_ptr_type
3109 = build_pointer_type_for_mode (aggr_type,
3110 TYPE_MODE (aggr_ptr_type), true);
3111 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3112 get_name (base_name));
3115 /* Likewise for any of the data references in the stmt group. */
3116 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3118 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3121 tree lhs = gimple_assign_lhs (orig_stmt);
3122 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3123 get_alias_set (lhs)))
3125 aggr_ptr_type
3126 = build_pointer_type_for_mode (aggr_type,
3127 TYPE_MODE (aggr_ptr_type), true);
3128 aggr_ptr
3129 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3130 get_name (base_name));
3131 break;
3134 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3136 while (orig_stmt);
3139 add_referenced_var (aggr_ptr);
3141 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3142 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3143 def-use update cycles for the pointer: one relative to the outer-loop
3144 (LOOP), which is what steps (3) and (4) below do. The other is relative
3145 to the inner-loop (which is the inner-most loop containing the dataref),
3146 and this is done be step (5) below.
3148 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3149 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3150 redundant. Steps (3),(4) create the following:
3152 vp0 = &base_addr;
3153 LOOP: vp1 = phi(vp0,vp2)
3156 vp2 = vp1 + step
3157 goto LOOP
3159 If there is an inner-loop nested in loop, then step (5) will also be
3160 applied, and an additional update in the inner-loop will be created:
3162 vp0 = &base_addr;
3163 LOOP: vp1 = phi(vp0,vp2)
3165 inner: vp3 = phi(vp1,vp4)
3166 vp4 = vp3 + inner_step
3167 if () goto inner
3169 vp2 = vp1 + step
3170 if () goto LOOP */
3172 /* (2) Calculate the initial address of the aggregate-pointer, and set
3173 the aggregate-pointer to point to it before the loop. */
3175 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3177 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3178 offset, loop);
3179 if (new_stmt_list)
3181 if (pe)
3183 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3184 gcc_assert (!new_bb);
3186 else
3187 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3190 *initial_address = new_temp;
3192 /* Create: p = (aggr_type *) initial_base */
3193 if (TREE_CODE (new_temp) != SSA_NAME
3194 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3196 vec_stmt = gimple_build_assign (aggr_ptr,
3197 fold_convert (aggr_ptr_type, new_temp));
3198 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3199 /* Copy the points-to information if it exists. */
3200 if (DR_PTR_INFO (dr))
3201 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3202 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3203 if (pe)
3205 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3206 gcc_assert (!new_bb);
3208 else
3209 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3211 else
3212 aggr_ptr_init = new_temp;
3214 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3215 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3216 inner-loop nested in LOOP (during outer-loop vectorization). */
3218 /* No update in loop is required. */
3219 if (only_init && (!loop_vinfo || at_loop == loop))
3220 aptr = aggr_ptr_init;
3221 else
3223 /* The step of the aggregate pointer is the type size. */
3224 tree step = TYPE_SIZE_UNIT (aggr_type);
3225 /* One exception to the above is when the scalar step of the load in
3226 LOOP is zero. In this case the step here is also zero. */
3227 if (*inv_p)
3228 step = size_zero_node;
3229 else if (negative)
3230 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3232 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3234 create_iv (aggr_ptr_init,
3235 fold_convert (aggr_ptr_type, step),
3236 aggr_ptr, loop, &incr_gsi, insert_after,
3237 &indx_before_incr, &indx_after_incr);
3238 incr = gsi_stmt (incr_gsi);
3239 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3241 /* Copy the points-to information if it exists. */
3242 if (DR_PTR_INFO (dr))
3244 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3245 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3247 if (ptr_incr)
3248 *ptr_incr = incr;
3250 aptr = indx_before_incr;
3253 if (!nested_in_vect_loop || only_init)
3254 return aptr;
3257 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3258 nested in LOOP, if exists. */
3260 gcc_assert (nested_in_vect_loop);
3261 if (!only_init)
3263 standard_iv_increment_position (containing_loop, &incr_gsi,
3264 &insert_after);
3265 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3266 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3267 &indx_after_incr);
3268 incr = gsi_stmt (incr_gsi);
3269 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3271 /* Copy the points-to information if it exists. */
3272 if (DR_PTR_INFO (dr))
3274 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3275 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3277 if (ptr_incr)
3278 *ptr_incr = incr;
3280 return indx_before_incr;
3282 else
3283 gcc_unreachable ();
3287 /* Function bump_vector_ptr
3289 Increment a pointer (to a vector type) by vector-size. If requested,
3290 i.e. if PTR-INCR is given, then also connect the new increment stmt
3291 to the existing def-use update-chain of the pointer, by modifying
3292 the PTR_INCR as illustrated below:
3294 The pointer def-use update-chain before this function:
3295 DATAREF_PTR = phi (p_0, p_2)
3296 ....
3297 PTR_INCR: p_2 = DATAREF_PTR + step
3299 The pointer def-use update-chain after this function:
3300 DATAREF_PTR = phi (p_0, p_2)
3301 ....
3302 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3303 ....
3304 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3306 Input:
3307 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3308 in the loop.
3309 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3310 the loop. The increment amount across iterations is expected
3311 to be vector_size.
3312 BSI - location where the new update stmt is to be placed.
3313 STMT - the original scalar memory-access stmt that is being vectorized.
3314 BUMP - optional. The offset by which to bump the pointer. If not given,
3315 the offset is assumed to be vector_size.
3317 Output: Return NEW_DATAREF_PTR as illustrated above.
3321 tree
3322 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3323 gimple stmt, tree bump)
3325 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3326 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3327 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3328 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3329 tree update = TYPE_SIZE_UNIT (vectype);
3330 gimple incr_stmt;
3331 ssa_op_iter iter;
3332 use_operand_p use_p;
3333 tree new_dataref_ptr;
3335 if (bump)
3336 update = bump;
3338 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3339 dataref_ptr, update);
3340 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3341 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3342 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3344 /* Copy the points-to information if it exists. */
3345 if (DR_PTR_INFO (dr))
3347 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3348 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3349 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3352 if (!ptr_incr)
3353 return new_dataref_ptr;
3355 /* Update the vector-pointer's cross-iteration increment. */
3356 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3358 tree use = USE_FROM_PTR (use_p);
3360 if (use == dataref_ptr)
3361 SET_USE (use_p, new_dataref_ptr);
3362 else
3363 gcc_assert (tree_int_cst_compare (use, update) == 0);
3366 return new_dataref_ptr;
3370 /* Function vect_create_destination_var.
3372 Create a new temporary of type VECTYPE. */
3374 tree
3375 vect_create_destination_var (tree scalar_dest, tree vectype)
3377 tree vec_dest;
3378 const char *new_name;
3379 tree type;
3380 enum vect_var_kind kind;
3382 kind = vectype ? vect_simple_var : vect_scalar_var;
3383 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3385 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3387 new_name = get_name (scalar_dest);
3388 if (!new_name)
3389 new_name = "var_";
3390 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3391 add_referenced_var (vec_dest);
3393 return vec_dest;
3396 /* Function vect_strided_store_supported.
3398 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3399 and FALSE otherwise. */
3401 bool
3402 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3404 optab interleave_high_optab, interleave_low_optab;
3405 enum machine_mode mode;
3407 mode = TYPE_MODE (vectype);
3409 /* vect_permute_store_chain requires the group size to be a power of two. */
3410 if (exact_log2 (count) == -1)
3412 if (vect_print_dump_info (REPORT_DETAILS))
3413 fprintf (vect_dump, "the size of the group of strided accesses"
3414 " is not a power of 2");
3415 return false;
3418 /* Check that the operation is supported. */
3419 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3420 vectype, optab_default);
3421 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3422 vectype, optab_default);
3423 if (!interleave_high_optab || !interleave_low_optab)
3425 if (vect_print_dump_info (REPORT_DETAILS))
3426 fprintf (vect_dump, "no optab for interleave.");
3427 return false;
3430 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3431 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3433 if (vect_print_dump_info (REPORT_DETAILS))
3434 fprintf (vect_dump, "interleave op not supported by target.");
3435 return false;
3438 return true;
3442 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3443 type VECTYPE. */
3445 bool
3446 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3448 return vect_lanes_optab_supported_p ("vec_store_lanes",
3449 vec_store_lanes_optab,
3450 vectype, count);
3454 /* Function vect_permute_store_chain.
3456 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3457 a power of 2, generate interleave_high/low stmts to reorder the data
3458 correctly for the stores. Return the final references for stores in
3459 RESULT_CHAIN.
3461 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3462 The input is 4 vectors each containing 8 elements. We assign a number to
3463 each element, the input sequence is:
3465 1st vec: 0 1 2 3 4 5 6 7
3466 2nd vec: 8 9 10 11 12 13 14 15
3467 3rd vec: 16 17 18 19 20 21 22 23
3468 4th vec: 24 25 26 27 28 29 30 31
3470 The output sequence should be:
3472 1st vec: 0 8 16 24 1 9 17 25
3473 2nd vec: 2 10 18 26 3 11 19 27
3474 3rd vec: 4 12 20 28 5 13 21 30
3475 4th vec: 6 14 22 30 7 15 23 31
3477 i.e., we interleave the contents of the four vectors in their order.
3479 We use interleave_high/low instructions to create such output. The input of
3480 each interleave_high/low operation is two vectors:
3481 1st vec 2nd vec
3482 0 1 2 3 4 5 6 7
3483 the even elements of the result vector are obtained left-to-right from the
3484 high/low elements of the first vector. The odd elements of the result are
3485 obtained left-to-right from the high/low elements of the second vector.
3486 The output of interleave_high will be: 0 4 1 5
3487 and of interleave_low: 2 6 3 7
3490 The permutation is done in log LENGTH stages. In each stage interleave_high
3491 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3492 where the first argument is taken from the first half of DR_CHAIN and the
3493 second argument from it's second half.
3494 In our example,
3496 I1: interleave_high (1st vec, 3rd vec)
3497 I2: interleave_low (1st vec, 3rd vec)
3498 I3: interleave_high (2nd vec, 4th vec)
3499 I4: interleave_low (2nd vec, 4th vec)
3501 The output for the first stage is:
3503 I1: 0 16 1 17 2 18 3 19
3504 I2: 4 20 5 21 6 22 7 23
3505 I3: 8 24 9 25 10 26 11 27
3506 I4: 12 28 13 29 14 30 15 31
3508 The output of the second stage, i.e. the final result is:
3510 I1: 0 8 16 24 1 9 17 25
3511 I2: 2 10 18 26 3 11 19 27
3512 I3: 4 12 20 28 5 13 21 30
3513 I4: 6 14 22 30 7 15 23 31. */
3515 void
3516 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3517 unsigned int length,
3518 gimple stmt,
3519 gimple_stmt_iterator *gsi,
3520 VEC(tree,heap) **result_chain)
3522 tree perm_dest, vect1, vect2, high, low;
3523 gimple perm_stmt;
3524 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3525 int i;
3526 unsigned int j;
3527 enum tree_code high_code, low_code;
3529 gcc_assert (vect_strided_store_supported (vectype, length));
3531 *result_chain = VEC_copy (tree, heap, dr_chain);
3533 for (i = 0; i < exact_log2 (length); i++)
3535 for (j = 0; j < length/2; j++)
3537 vect1 = VEC_index (tree, dr_chain, j);
3538 vect2 = VEC_index (tree, dr_chain, j+length/2);
3540 /* Create interleaving stmt:
3541 in the case of big endian:
3542 high = interleave_high (vect1, vect2)
3543 and in the case of little endian:
3544 high = interleave_low (vect1, vect2). */
3545 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3546 DECL_GIMPLE_REG_P (perm_dest) = 1;
3547 add_referenced_var (perm_dest);
3548 if (BYTES_BIG_ENDIAN)
3550 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3551 low_code = VEC_INTERLEAVE_LOW_EXPR;
3553 else
3555 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3556 high_code = VEC_INTERLEAVE_LOW_EXPR;
3558 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3559 vect1, vect2);
3560 high = make_ssa_name (perm_dest, perm_stmt);
3561 gimple_assign_set_lhs (perm_stmt, high);
3562 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3563 VEC_replace (tree, *result_chain, 2*j, high);
3565 /* Create interleaving stmt:
3566 in the case of big endian:
3567 low = interleave_low (vect1, vect2)
3568 and in the case of little endian:
3569 low = interleave_high (vect1, vect2). */
3570 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3571 DECL_GIMPLE_REG_P (perm_dest) = 1;
3572 add_referenced_var (perm_dest);
3573 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3574 vect1, vect2);
3575 low = make_ssa_name (perm_dest, perm_stmt);
3576 gimple_assign_set_lhs (perm_stmt, low);
3577 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3578 VEC_replace (tree, *result_chain, 2*j+1, low);
3580 dr_chain = VEC_copy (tree, heap, *result_chain);
3584 /* Function vect_setup_realignment
3586 This function is called when vectorizing an unaligned load using
3587 the dr_explicit_realign[_optimized] scheme.
3588 This function generates the following code at the loop prolog:
3590 p = initial_addr;
3591 x msq_init = *(floor(p)); # prolog load
3592 realignment_token = call target_builtin;
3593 loop:
3594 x msq = phi (msq_init, ---)
3596 The stmts marked with x are generated only for the case of
3597 dr_explicit_realign_optimized.
3599 The code above sets up a new (vector) pointer, pointing to the first
3600 location accessed by STMT, and a "floor-aligned" load using that pointer.
3601 It also generates code to compute the "realignment-token" (if the relevant
3602 target hook was defined), and creates a phi-node at the loop-header bb
3603 whose arguments are the result of the prolog-load (created by this
3604 function) and the result of a load that takes place in the loop (to be
3605 created by the caller to this function).
3607 For the case of dr_explicit_realign_optimized:
3608 The caller to this function uses the phi-result (msq) to create the
3609 realignment code inside the loop, and sets up the missing phi argument,
3610 as follows:
3611 loop:
3612 msq = phi (msq_init, lsq)
3613 lsq = *(floor(p')); # load in loop
3614 result = realign_load (msq, lsq, realignment_token);
3616 For the case of dr_explicit_realign:
3617 loop:
3618 msq = *(floor(p)); # load in loop
3619 p' = p + (VS-1);
3620 lsq = *(floor(p')); # load in loop
3621 result = realign_load (msq, lsq, realignment_token);
3623 Input:
3624 STMT - (scalar) load stmt to be vectorized. This load accesses
3625 a memory location that may be unaligned.
3626 BSI - place where new code is to be inserted.
3627 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3628 is used.
3630 Output:
3631 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3632 target hook, if defined.
3633 Return value - the result of the loop-header phi node. */
3635 tree
3636 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3637 tree *realignment_token,
3638 enum dr_alignment_support alignment_support_scheme,
3639 tree init_addr,
3640 struct loop **at_loop)
3642 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3643 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3644 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3645 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3646 struct loop *loop = NULL;
3647 edge pe = NULL;
3648 tree scalar_dest = gimple_assign_lhs (stmt);
3649 tree vec_dest;
3650 gimple inc;
3651 tree ptr;
3652 tree data_ref;
3653 gimple new_stmt;
3654 basic_block new_bb;
3655 tree msq_init = NULL_TREE;
3656 tree new_temp;
3657 gimple phi_stmt;
3658 tree msq = NULL_TREE;
3659 gimple_seq stmts = NULL;
3660 bool inv_p;
3661 bool compute_in_loop = false;
3662 bool nested_in_vect_loop = false;
3663 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3664 struct loop *loop_for_initial_load = NULL;
3666 if (loop_vinfo)
3668 loop = LOOP_VINFO_LOOP (loop_vinfo);
3669 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3672 gcc_assert (alignment_support_scheme == dr_explicit_realign
3673 || alignment_support_scheme == dr_explicit_realign_optimized);
3675 /* We need to generate three things:
3676 1. the misalignment computation
3677 2. the extra vector load (for the optimized realignment scheme).
3678 3. the phi node for the two vectors from which the realignment is
3679 done (for the optimized realignment scheme). */
3681 /* 1. Determine where to generate the misalignment computation.
3683 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3684 calculation will be generated by this function, outside the loop (in the
3685 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3686 caller, inside the loop.
3688 Background: If the misalignment remains fixed throughout the iterations of
3689 the loop, then both realignment schemes are applicable, and also the
3690 misalignment computation can be done outside LOOP. This is because we are
3691 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3692 are a multiple of VS (the Vector Size), and therefore the misalignment in
3693 different vectorized LOOP iterations is always the same.
3694 The problem arises only if the memory access is in an inner-loop nested
3695 inside LOOP, which is now being vectorized using outer-loop vectorization.
3696 This is the only case when the misalignment of the memory access may not
3697 remain fixed throughout the iterations of the inner-loop (as explained in
3698 detail in vect_supportable_dr_alignment). In this case, not only is the
3699 optimized realignment scheme not applicable, but also the misalignment
3700 computation (and generation of the realignment token that is passed to
3701 REALIGN_LOAD) have to be done inside the loop.
3703 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3704 or not, which in turn determines if the misalignment is computed inside
3705 the inner-loop, or outside LOOP. */
3707 if (init_addr != NULL_TREE || !loop_vinfo)
3709 compute_in_loop = true;
3710 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3714 /* 2. Determine where to generate the extra vector load.
3716 For the optimized realignment scheme, instead of generating two vector
3717 loads in each iteration, we generate a single extra vector load in the
3718 preheader of the loop, and in each iteration reuse the result of the
3719 vector load from the previous iteration. In case the memory access is in
3720 an inner-loop nested inside LOOP, which is now being vectorized using
3721 outer-loop vectorization, we need to determine whether this initial vector
3722 load should be generated at the preheader of the inner-loop, or can be
3723 generated at the preheader of LOOP. If the memory access has no evolution
3724 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3725 to be generated inside LOOP (in the preheader of the inner-loop). */
3727 if (nested_in_vect_loop)
3729 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3730 bool invariant_in_outerloop =
3731 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3732 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3734 else
3735 loop_for_initial_load = loop;
3736 if (at_loop)
3737 *at_loop = loop_for_initial_load;
3739 if (loop_for_initial_load)
3740 pe = loop_preheader_edge (loop_for_initial_load);
3742 /* 3. For the case of the optimized realignment, create the first vector
3743 load at the loop preheader. */
3745 if (alignment_support_scheme == dr_explicit_realign_optimized)
3747 /* Create msq_init = *(floor(p1)) in the loop preheader */
3749 gcc_assert (!compute_in_loop);
3750 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3751 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
3752 NULL_TREE, &init_addr, NULL, &inc,
3753 true, &inv_p);
3754 new_stmt = gimple_build_assign_with_ops
3755 (BIT_AND_EXPR, NULL_TREE, ptr,
3756 build_int_cst (TREE_TYPE (ptr),
3757 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3758 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3759 gimple_assign_set_lhs (new_stmt, new_temp);
3760 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3761 gcc_assert (!new_bb);
3762 data_ref
3763 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3764 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3765 new_stmt = gimple_build_assign (vec_dest, data_ref);
3766 new_temp = make_ssa_name (vec_dest, new_stmt);
3767 gimple_assign_set_lhs (new_stmt, new_temp);
3768 mark_symbols_for_renaming (new_stmt);
3769 if (pe)
3771 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3772 gcc_assert (!new_bb);
3774 else
3775 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3777 msq_init = gimple_assign_lhs (new_stmt);
3780 /* 4. Create realignment token using a target builtin, if available.
3781 It is done either inside the containing loop, or before LOOP (as
3782 determined above). */
3784 if (targetm.vectorize.builtin_mask_for_load)
3786 tree builtin_decl;
3788 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3789 if (!init_addr)
3791 /* Generate the INIT_ADDR computation outside LOOP. */
3792 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3793 NULL_TREE, loop);
3794 if (loop)
3796 pe = loop_preheader_edge (loop);
3797 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3798 gcc_assert (!new_bb);
3800 else
3801 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3804 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3805 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3806 vec_dest =
3807 vect_create_destination_var (scalar_dest,
3808 gimple_call_return_type (new_stmt));
3809 new_temp = make_ssa_name (vec_dest, new_stmt);
3810 gimple_call_set_lhs (new_stmt, new_temp);
3812 if (compute_in_loop)
3813 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3814 else
3816 /* Generate the misalignment computation outside LOOP. */
3817 pe = loop_preheader_edge (loop);
3818 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3819 gcc_assert (!new_bb);
3822 *realignment_token = gimple_call_lhs (new_stmt);
3824 /* The result of the CALL_EXPR to this builtin is determined from
3825 the value of the parameter and no global variables are touched
3826 which makes the builtin a "const" function. Requiring the
3827 builtin to have the "const" attribute makes it unnecessary
3828 to call mark_call_clobbered. */
3829 gcc_assert (TREE_READONLY (builtin_decl));
3832 if (alignment_support_scheme == dr_explicit_realign)
3833 return msq;
3835 gcc_assert (!compute_in_loop);
3836 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3839 /* 5. Create msq = phi <msq_init, lsq> in loop */
3841 pe = loop_preheader_edge (containing_loop);
3842 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3843 msq = make_ssa_name (vec_dest, NULL);
3844 phi_stmt = create_phi_node (msq, containing_loop->header);
3845 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3846 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3848 return msq;
3852 /* Function vect_strided_load_supported.
3854 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3855 and FALSE otherwise. */
3857 bool
3858 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
3860 optab perm_even_optab, perm_odd_optab;
3861 enum machine_mode mode;
3863 mode = TYPE_MODE (vectype);
3865 /* vect_permute_load_chain requires the group size to be a power of two. */
3866 if (exact_log2 (count) == -1)
3868 if (vect_print_dump_info (REPORT_DETAILS))
3869 fprintf (vect_dump, "the size of the group of strided accesses"
3870 " is not a power of 2");
3871 return false;
3874 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3875 optab_default);
3876 if (!perm_even_optab)
3878 if (vect_print_dump_info (REPORT_DETAILS))
3879 fprintf (vect_dump, "no optab for perm_even.");
3880 return false;
3883 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3885 if (vect_print_dump_info (REPORT_DETAILS))
3886 fprintf (vect_dump, "perm_even op not supported by target.");
3887 return false;
3890 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3891 optab_default);
3892 if (!perm_odd_optab)
3894 if (vect_print_dump_info (REPORT_DETAILS))
3895 fprintf (vect_dump, "no optab for perm_odd.");
3896 return false;
3899 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3901 if (vect_print_dump_info (REPORT_DETAILS))
3902 fprintf (vect_dump, "perm_odd op not supported by target.");
3903 return false;
3905 return true;
3908 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
3909 type VECTYPE. */
3911 bool
3912 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3914 return vect_lanes_optab_supported_p ("vec_load_lanes",
3915 vec_load_lanes_optab,
3916 vectype, count);
3919 /* Function vect_permute_load_chain.
3921 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3922 a power of 2, generate extract_even/odd stmts to reorder the input data
3923 correctly. Return the final references for loads in RESULT_CHAIN.
3925 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3926 The input is 4 vectors each containing 8 elements. We assign a number to each
3927 element, the input sequence is:
3929 1st vec: 0 1 2 3 4 5 6 7
3930 2nd vec: 8 9 10 11 12 13 14 15
3931 3rd vec: 16 17 18 19 20 21 22 23
3932 4th vec: 24 25 26 27 28 29 30 31
3934 The output sequence should be:
3936 1st vec: 0 4 8 12 16 20 24 28
3937 2nd vec: 1 5 9 13 17 21 25 29
3938 3rd vec: 2 6 10 14 18 22 26 30
3939 4th vec: 3 7 11 15 19 23 27 31
3941 i.e., the first output vector should contain the first elements of each
3942 interleaving group, etc.
3944 We use extract_even/odd instructions to create such output. The input of
3945 each extract_even/odd operation is two vectors
3946 1st vec 2nd vec
3947 0 1 2 3 4 5 6 7
3949 and the output is the vector of extracted even/odd elements. The output of
3950 extract_even will be: 0 2 4 6
3951 and of extract_odd: 1 3 5 7
3954 The permutation is done in log LENGTH stages. In each stage extract_even
3955 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3956 their order. In our example,
3958 E1: extract_even (1st vec, 2nd vec)
3959 E2: extract_odd (1st vec, 2nd vec)
3960 E3: extract_even (3rd vec, 4th vec)
3961 E4: extract_odd (3rd vec, 4th vec)
3963 The output for the first stage will be:
3965 E1: 0 2 4 6 8 10 12 14
3966 E2: 1 3 5 7 9 11 13 15
3967 E3: 16 18 20 22 24 26 28 30
3968 E4: 17 19 21 23 25 27 29 31
3970 In order to proceed and create the correct sequence for the next stage (or
3971 for the correct output, if the second stage is the last one, as in our
3972 example), we first put the output of extract_even operation and then the
3973 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3974 The input for the second stage is:
3976 1st vec (E1): 0 2 4 6 8 10 12 14
3977 2nd vec (E3): 16 18 20 22 24 26 28 30
3978 3rd vec (E2): 1 3 5 7 9 11 13 15
3979 4th vec (E4): 17 19 21 23 25 27 29 31
3981 The output of the second stage:
3983 E1: 0 4 8 12 16 20 24 28
3984 E2: 2 6 10 14 18 22 26 30
3985 E3: 1 5 9 13 17 21 25 29
3986 E4: 3 7 11 15 19 23 27 31
3988 And RESULT_CHAIN after reordering:
3990 1st vec (E1): 0 4 8 12 16 20 24 28
3991 2nd vec (E3): 1 5 9 13 17 21 25 29
3992 3rd vec (E2): 2 6 10 14 18 22 26 30
3993 4th vec (E4): 3 7 11 15 19 23 27 31. */
3995 static void
3996 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3997 unsigned int length,
3998 gimple stmt,
3999 gimple_stmt_iterator *gsi,
4000 VEC(tree,heap) **result_chain)
4002 tree perm_dest, data_ref, first_vect, second_vect;
4003 gimple perm_stmt;
4004 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4005 int i;
4006 unsigned int j;
4008 gcc_assert (vect_strided_load_supported (vectype, length));
4010 *result_chain = VEC_copy (tree, heap, dr_chain);
4011 for (i = 0; i < exact_log2 (length); i++)
4013 for (j = 0; j < length; j +=2)
4015 first_vect = VEC_index (tree, dr_chain, j);
4016 second_vect = VEC_index (tree, dr_chain, j+1);
4018 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4019 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4020 DECL_GIMPLE_REG_P (perm_dest) = 1;
4021 add_referenced_var (perm_dest);
4023 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
4024 perm_dest, first_vect,
4025 second_vect);
4027 data_ref = make_ssa_name (perm_dest, perm_stmt);
4028 gimple_assign_set_lhs (perm_stmt, data_ref);
4029 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4030 mark_symbols_for_renaming (perm_stmt);
4032 VEC_replace (tree, *result_chain, j/2, data_ref);
4034 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4035 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4036 DECL_GIMPLE_REG_P (perm_dest) = 1;
4037 add_referenced_var (perm_dest);
4039 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
4040 perm_dest, first_vect,
4041 second_vect);
4042 data_ref = make_ssa_name (perm_dest, perm_stmt);
4043 gimple_assign_set_lhs (perm_stmt, data_ref);
4044 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4045 mark_symbols_for_renaming (perm_stmt);
4047 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4049 dr_chain = VEC_copy (tree, heap, *result_chain);
4054 /* Function vect_transform_strided_load.
4056 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4057 to perform their permutation and ascribe the result vectorized statements to
4058 the scalar statements.
4061 void
4062 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4063 gimple_stmt_iterator *gsi)
4065 VEC(tree,heap) *result_chain = NULL;
4067 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4068 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4069 vectors, that are ready for vector computation. */
4070 result_chain = VEC_alloc (tree, heap, size);
4071 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4072 vect_record_strided_load_vectors (stmt, result_chain);
4073 VEC_free (tree, heap, result_chain);
4076 /* RESULT_CHAIN contains the output of a group of strided loads that were
4077 generated as part of the vectorization of STMT. Assign the statement
4078 for each vector to the associated scalar statement. */
4080 void
4081 vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4083 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4084 gimple next_stmt, new_stmt;
4085 unsigned int i, gap_count;
4086 tree tmp_data_ref;
4088 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4089 Since we scan the chain starting from it's first node, their order
4090 corresponds the order of data-refs in RESULT_CHAIN. */
4091 next_stmt = first_stmt;
4092 gap_count = 1;
4093 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4095 if (!next_stmt)
4096 break;
4098 /* Skip the gaps. Loads created for the gaps will be removed by dead
4099 code elimination pass later. No need to check for the first stmt in
4100 the group, since it always exists.
4101 GROUP_GAP is the number of steps in elements from the previous
4102 access (if there is no gap GROUP_GAP is 1). We skip loads that
4103 correspond to the gaps. */
4104 if (next_stmt != first_stmt
4105 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4107 gap_count++;
4108 continue;
4111 while (next_stmt)
4113 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4114 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4115 copies, and we put the new vector statement in the first available
4116 RELATED_STMT. */
4117 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4118 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4119 else
4121 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4123 gimple prev_stmt =
4124 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4125 gimple rel_stmt =
4126 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4127 while (rel_stmt)
4129 prev_stmt = rel_stmt;
4130 rel_stmt =
4131 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4134 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4135 new_stmt;
4139 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4140 gap_count = 1;
4141 /* If NEXT_STMT accesses the same DR as the previous statement,
4142 put the same TMP_DATA_REF as its vectorized statement; otherwise
4143 get the next data-ref from RESULT_CHAIN. */
4144 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4145 break;
4150 /* Function vect_force_dr_alignment_p.
4152 Returns whether the alignment of a DECL can be forced to be aligned
4153 on ALIGNMENT bit boundary. */
4155 bool
4156 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4158 if (TREE_CODE (decl) != VAR_DECL)
4159 return false;
4161 if (DECL_EXTERNAL (decl))
4162 return false;
4164 if (TREE_ASM_WRITTEN (decl))
4165 return false;
4167 if (TREE_STATIC (decl))
4168 return (alignment <= MAX_OFILE_ALIGNMENT);
4169 else
4170 return (alignment <= MAX_STACK_ALIGNMENT);
4174 /* Return whether the data reference DR is supported with respect to its
4175 alignment.
4176 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4177 it is aligned, i.e., check if it is possible to vectorize it with different
4178 alignment. */
4180 enum dr_alignment_support
4181 vect_supportable_dr_alignment (struct data_reference *dr,
4182 bool check_aligned_accesses)
4184 gimple stmt = DR_STMT (dr);
4185 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4186 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4187 enum machine_mode mode = TYPE_MODE (vectype);
4188 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4189 struct loop *vect_loop = NULL;
4190 bool nested_in_vect_loop = false;
4192 if (aligned_access_p (dr) && !check_aligned_accesses)
4193 return dr_aligned;
4195 if (loop_vinfo)
4197 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4198 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4201 /* Possibly unaligned access. */
4203 /* We can choose between using the implicit realignment scheme (generating
4204 a misaligned_move stmt) and the explicit realignment scheme (generating
4205 aligned loads with a REALIGN_LOAD). There are two variants to the
4206 explicit realignment scheme: optimized, and unoptimized.
4207 We can optimize the realignment only if the step between consecutive
4208 vector loads is equal to the vector size. Since the vector memory
4209 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4210 is guaranteed that the misalignment amount remains the same throughout the
4211 execution of the vectorized loop. Therefore, we can create the
4212 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4213 at the loop preheader.
4215 However, in the case of outer-loop vectorization, when vectorizing a
4216 memory access in the inner-loop nested within the LOOP that is now being
4217 vectorized, while it is guaranteed that the misalignment of the
4218 vectorized memory access will remain the same in different outer-loop
4219 iterations, it is *not* guaranteed that is will remain the same throughout
4220 the execution of the inner-loop. This is because the inner-loop advances
4221 with the original scalar step (and not in steps of VS). If the inner-loop
4222 step happens to be a multiple of VS, then the misalignment remains fixed
4223 and we can use the optimized realignment scheme. For example:
4225 for (i=0; i<N; i++)
4226 for (j=0; j<M; j++)
4227 s += a[i+j];
4229 When vectorizing the i-loop in the above example, the step between
4230 consecutive vector loads is 1, and so the misalignment does not remain
4231 fixed across the execution of the inner-loop, and the realignment cannot
4232 be optimized (as illustrated in the following pseudo vectorized loop):
4234 for (i=0; i<N; i+=4)
4235 for (j=0; j<M; j++){
4236 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4237 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4238 // (assuming that we start from an aligned address).
4241 We therefore have to use the unoptimized realignment scheme:
4243 for (i=0; i<N; i+=4)
4244 for (j=k; j<M; j+=4)
4245 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4246 // that the misalignment of the initial address is
4247 // 0).
4249 The loop can then be vectorized as follows:
4251 for (k=0; k<4; k++){
4252 rt = get_realignment_token (&vp[k]);
4253 for (i=0; i<N; i+=4){
4254 v1 = vp[i+k];
4255 for (j=k; j<M; j+=4){
4256 v2 = vp[i+j+VS-1];
4257 va = REALIGN_LOAD <v1,v2,rt>;
4258 vs += va;
4259 v1 = v2;
4262 } */
4264 if (DR_IS_READ (dr))
4266 bool is_packed = false;
4267 tree type = (TREE_TYPE (DR_REF (dr)));
4269 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4270 && (!targetm.vectorize.builtin_mask_for_load
4271 || targetm.vectorize.builtin_mask_for_load ()))
4273 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4274 if ((nested_in_vect_loop
4275 && (TREE_INT_CST_LOW (DR_STEP (dr))
4276 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4277 || !loop_vinfo)
4278 return dr_explicit_realign;
4279 else
4280 return dr_explicit_realign_optimized;
4282 if (!known_alignment_for_access_p (dr))
4284 tree ba = DR_BASE_OBJECT (dr);
4286 if (ba)
4287 is_packed = contains_packed_reference (ba);
4290 if (targetm.vectorize.
4291 support_vector_misalignment (mode, type,
4292 DR_MISALIGNMENT (dr), is_packed))
4293 /* Can't software pipeline the loads, but can at least do them. */
4294 return dr_unaligned_supported;
4296 else
4298 bool is_packed = false;
4299 tree type = (TREE_TYPE (DR_REF (dr)));
4301 if (!known_alignment_for_access_p (dr))
4303 tree ba = DR_BASE_OBJECT (dr);
4305 if (ba)
4306 is_packed = contains_packed_reference (ba);
4309 if (targetm.vectorize.
4310 support_vector_misalignment (mode, type,
4311 DR_MISALIGNMENT (dr), is_packed))
4312 return dr_unaligned_supported;
4315 /* Unsupported. */
4316 return dr_unaligned_unsupported;