Remove _GNU_SOURCE from AM_CPPFLAGS
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob760bc41a30f72526465a68d288d6e7d3b4fb3fc5
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return the smallest scalar part of STMT.
47 This is used to determine the vectype of the stmt. We generally set the
48 vectype according to the type of the result (lhs). For stmts whose
49 result-type is different than the type of the arguments (e.g., demotion,
50 promotion), vectype will be reset appropriately (later). Note that we have
51 to visit the smallest datatype in this function, because that determines the
52 VF. If the smallest datatype in the loop is present only as the rhs of a
53 promotion operation - we'd miss it.
54 Such a case, where a variable of this datatype does not appear in the lhs
55 anywhere in the loop, can only occur if it's an invariant: e.g.:
56 'int_x = (int) short_inv', which we'd expect to have been optimized away by
57 invariant motion. However, we cannot rely on invariant motion to always
58 take invariants out of the loop, and so in the case of promotion we also
59 have to check the rhs.
60 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
61 types. */
63 tree
64 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
65 HOST_WIDE_INT *rhs_size_unit)
67 tree scalar_type = gimple_expr_type (stmt);
68 HOST_WIDE_INT lhs, rhs;
70 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
72 if (is_gimple_assign (stmt)
73 && (gimple_assign_cast_p (stmt)
74 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
75 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
77 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
79 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
80 if (rhs < lhs)
81 scalar_type = rhs_type;
84 *lhs_size_unit = lhs;
85 *rhs_size_unit = rhs;
86 return scalar_type;
90 /* Find the place of the data-ref in STMT in the interleaving chain that starts
91 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
93 int
94 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
96 gimple next_stmt = first_stmt;
97 int result = 0;
99 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
100 return -1;
102 while (next_stmt && next_stmt != stmt)
104 result++;
105 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
108 if (next_stmt)
109 return result;
110 else
111 return -1;
115 /* Function vect_insert_into_interleaving_chain.
117 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
119 static void
120 vect_insert_into_interleaving_chain (struct data_reference *dra,
121 struct data_reference *drb)
123 gimple prev, next;
124 tree next_init;
125 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
126 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
128 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
129 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
130 while (next)
132 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
133 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
135 /* Insert here. */
136 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
137 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
138 return;
140 prev = next;
141 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
144 /* We got to the end of the list. Insert here. */
145 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
146 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
150 /* Function vect_update_interleaving_chain.
152 For two data-refs DRA and DRB that are a part of a chain interleaved data
153 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
155 There are four possible cases:
156 1. New stmts - both DRA and DRB are not a part of any chain:
157 FIRST_DR = DRB
158 NEXT_DR (DRB) = DRA
159 2. DRB is a part of a chain and DRA is not:
160 no need to update FIRST_DR
161 no need to insert DRB
162 insert DRA according to init
163 3. DRA is a part of a chain and DRB is not:
164 if (init of FIRST_DR > init of DRB)
165 FIRST_DR = DRB
166 NEXT(FIRST_DR) = previous FIRST_DR
167 else
168 insert DRB according to its init
169 4. both DRA and DRB are in some interleaving chains:
170 choose the chain with the smallest init of FIRST_DR
171 insert the nodes of the second chain into the first one. */
173 static void
174 vect_update_interleaving_chain (struct data_reference *drb,
175 struct data_reference *dra)
177 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
178 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
179 tree next_init, init_dra_chain, init_drb_chain;
180 gimple first_a, first_b;
181 tree node_init;
182 gimple node, prev, next, first_stmt;
184 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
185 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
187 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
188 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
189 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
190 return;
193 /* 2. DRB is a part of a chain and DRA is not. */
194 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
196 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
197 /* Insert DRA into the chain of DRB. */
198 vect_insert_into_interleaving_chain (dra, drb);
199 return;
202 /* 3. DRA is a part of a chain and DRB is not. */
203 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
205 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
206 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
207 old_first_stmt)));
208 gimple tmp;
210 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
212 /* DRB's init is smaller than the init of the stmt previously marked
213 as the first stmt of the interleaving chain of DRA. Therefore, we
214 update FIRST_STMT and put DRB in the head of the list. */
215 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
216 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
218 /* Update all the stmts in the list to point to the new FIRST_STMT. */
219 tmp = old_first_stmt;
220 while (tmp)
222 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
223 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
226 else
228 /* Insert DRB in the list of DRA. */
229 vect_insert_into_interleaving_chain (drb, dra);
230 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
232 return;
235 /* 4. both DRA and DRB are in some interleaving chains. */
236 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
237 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
238 if (first_a == first_b)
239 return;
240 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
241 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
243 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
245 /* Insert the nodes of DRA chain into the DRB chain.
246 After inserting a node, continue from this node of the DRB chain (don't
247 start from the beginning. */
248 node = DR_GROUP_FIRST_DR (stmtinfo_a);
249 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
250 first_stmt = first_b;
252 else
254 /* Insert the nodes of DRB chain into the DRA chain.
255 After inserting a node, continue from this node of the DRA chain (don't
256 start from the beginning. */
257 node = DR_GROUP_FIRST_DR (stmtinfo_b);
258 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
259 first_stmt = first_a;
262 while (node)
264 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
265 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
266 while (next)
268 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
269 if (tree_int_cst_compare (next_init, node_init) > 0)
271 /* Insert here. */
272 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
274 prev = node;
275 break;
277 prev = next;
278 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
280 if (!next)
282 /* We got to the end of the list. Insert here. */
283 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
285 prev = node;
287 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
288 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
292 /* Check dependence between DRA and DRB for basic block vectorization.
293 If the accesses share same bases and offsets, we can compare their initial
294 constant offsets to decide whether they differ or not. In case of a read-
295 write dependence we check that the load is before the store to ensure that
296 vectorization will not change the order of the accesses. */
298 static bool
299 vect_drs_dependent_in_basic_block (struct data_reference *dra,
300 struct data_reference *drb)
302 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
303 gimple earlier_stmt;
305 /* We only call this function for pairs of loads and stores, but we verify
306 it here. */
307 if (DR_IS_READ (dra) == DR_IS_READ (drb))
309 if (DR_IS_READ (dra))
310 return false;
311 else
312 return true;
315 /* Check that the data-refs have same bases and offsets. If not, we can't
316 determine if they are dependent. */
317 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
318 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
319 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
320 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
321 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
322 || !dr_equal_offsets_p (dra, drb))
323 return true;
325 /* Check the types. */
326 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
327 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
329 if (type_size_a != type_size_b
330 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
331 TREE_TYPE (DR_REF (drb))))
332 return true;
334 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
335 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
337 /* Two different locations - no dependence. */
338 if (init_a != init_b)
339 return false;
341 /* We have a read-write dependence. Check that the load is before the store.
342 When we vectorize basic blocks, vector load can be only before
343 corresponding scalar load, and vector store can be only after its
344 corresponding scalar store. So the order of the acceses is preserved in
345 case the load is before the store. */
346 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
347 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
348 return false;
350 return true;
354 /* Function vect_check_interleaving.
356 Check if DRA and DRB are a part of interleaving. In case they are, insert
357 DRA and DRB in an interleaving chain. */
359 static bool
360 vect_check_interleaving (struct data_reference *dra,
361 struct data_reference *drb)
363 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
365 /* Check that the data-refs have same first location (except init) and they
366 are both either store or load (not load and store). */
367 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
368 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
369 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
370 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
371 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
372 || !dr_equal_offsets_p (dra, drb)
373 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
374 || DR_IS_READ (dra) != DR_IS_READ (drb))
375 return false;
377 /* Check:
378 1. data-refs are of the same type
379 2. their steps are equal
380 3. the step (if greater than zero) is greater than the difference between
381 data-refs' inits. */
382 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
383 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
385 if (type_size_a != type_size_b
386 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
387 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
388 TREE_TYPE (DR_REF (drb))))
389 return false;
391 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
392 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
393 step = TREE_INT_CST_LOW (DR_STEP (dra));
395 if (init_a > init_b)
397 /* If init_a == init_b + the size of the type * k, we have an interleaving,
398 and DRB is accessed before DRA. */
399 diff_mod_size = (init_a - init_b) % type_size_a;
401 if (step && (init_a - init_b) > step)
402 return false;
404 if (diff_mod_size == 0)
406 vect_update_interleaving_chain (drb, dra);
407 if (vect_print_dump_info (REPORT_DR_DETAILS))
409 fprintf (vect_dump, "Detected interleaving ");
410 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
411 fprintf (vect_dump, " and ");
412 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
414 return true;
417 else
419 /* If init_b == init_a + the size of the type * k, we have an
420 interleaving, and DRA is accessed before DRB. */
421 diff_mod_size = (init_b - init_a) % type_size_a;
423 if (step && (init_b - init_a) > step)
424 return false;
426 if (diff_mod_size == 0)
428 vect_update_interleaving_chain (dra, drb);
429 if (vect_print_dump_info (REPORT_DR_DETAILS))
431 fprintf (vect_dump, "Detected interleaving ");
432 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
433 fprintf (vect_dump, " and ");
434 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
436 return true;
440 return false;
443 /* Check if data references pointed by DR_I and DR_J are same or
444 belong to same interleaving group. Return FALSE if drs are
445 different, otherwise return TRUE. */
447 static bool
448 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
450 gimple stmt_i = DR_STMT (dr_i);
451 gimple stmt_j = DR_STMT (dr_j);
453 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
454 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
455 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
456 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
457 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
458 return true;
459 else
460 return false;
463 /* If address ranges represented by DDR_I and DDR_J are equal,
464 return TRUE, otherwise return FALSE. */
466 static bool
467 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
469 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
470 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
471 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
472 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
473 return true;
474 else
475 return false;
478 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
479 tested at run-time. Return TRUE if DDR was successfully inserted.
480 Return false if versioning is not supported. */
482 static bool
483 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
485 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
487 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
488 return false;
490 if (vect_print_dump_info (REPORT_DR_DETAILS))
492 fprintf (vect_dump, "mark for run-time aliasing test between ");
493 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
494 fprintf (vect_dump, " and ");
495 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
498 if (optimize_loop_nest_for_size_p (loop))
500 if (vect_print_dump_info (REPORT_DR_DETAILS))
501 fprintf (vect_dump, "versioning not supported when optimizing for size.");
502 return false;
505 /* FORNOW: We don't support versioning with outer-loop vectorization. */
506 if (loop->inner)
508 if (vect_print_dump_info (REPORT_DR_DETAILS))
509 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
510 return false;
513 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
514 return true;
518 /* Function vect_analyze_data_ref_dependence.
520 Return TRUE if there (might) exist a dependence between a memory-reference
521 DRA and a memory-reference DRB. When versioning for alias may check a
522 dependence at run-time, return FALSE. Adjust *MAX_VF according to
523 the data dependence. */
525 static bool
526 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
527 loop_vec_info loop_vinfo, int *max_vf,
528 bool *data_dependence_in_bb)
530 unsigned int i;
531 struct loop *loop = NULL;
532 struct data_reference *dra = DDR_A (ddr);
533 struct data_reference *drb = DDR_B (ddr);
534 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
535 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
536 lambda_vector dist_v;
537 unsigned int loop_depth;
539 /* Don't bother to analyze statements marked as unvectorizable. */
540 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
541 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
542 return false;
544 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
546 /* Independent data accesses. */
547 vect_check_interleaving (dra, drb);
548 return false;
551 if (loop_vinfo)
552 loop = LOOP_VINFO_LOOP (loop_vinfo);
554 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
555 return false;
557 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
559 if (loop_vinfo)
561 if (vect_print_dump_info (REPORT_DR_DETAILS))
563 fprintf (vect_dump, "versioning for alias required: "
564 "can't determine dependence between ");
565 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
566 fprintf (vect_dump, " and ");
567 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
570 /* Add to list of ddrs that need to be tested at run-time. */
571 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
574 /* When vectorizing a basic block unknown depnedence can still mean
575 strided access. */
576 if (vect_check_interleaving (dra, drb))
577 return false;
579 if (vect_print_dump_info (REPORT_DR_DETAILS))
581 fprintf (vect_dump, "can't determine dependence between ");
582 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
583 fprintf (vect_dump, " and ");
584 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
587 /* We do not vectorize basic blocks with write-write dependencies. */
588 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
589 return true;
591 /* We deal with read-write dependencies in basic blocks later (by
592 verifying that all the loads in the basic block are before all the
593 stores). */
594 *data_dependence_in_bb = true;
595 return false;
598 /* Versioning for alias is not yet supported for basic block SLP, and
599 dependence distance is unapplicable, hence, in case of known data
600 dependence, basic block vectorization is impossible for now. */
601 if (!loop_vinfo)
603 if (dra != drb && vect_check_interleaving (dra, drb))
604 return false;
606 if (vect_print_dump_info (REPORT_DR_DETAILS))
608 fprintf (vect_dump, "determined dependence between ");
609 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
610 fprintf (vect_dump, " and ");
611 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
614 /* Do not vectorize basic blcoks with write-write dependences. */
615 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
616 return true;
618 /* Check if this dependence is allowed in basic block vectorization. */
619 return vect_drs_dependent_in_basic_block (dra, drb);
622 /* Loop-based vectorization and known data dependence. */
623 if (DDR_NUM_DIST_VECTS (ddr) == 0)
625 if (vect_print_dump_info (REPORT_DR_DETAILS))
627 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
628 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
629 fprintf (vect_dump, " and ");
630 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
632 /* Add to list of ddrs that need to be tested at run-time. */
633 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
636 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
637 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
639 int dist = dist_v[loop_depth];
641 if (vect_print_dump_info (REPORT_DR_DETAILS))
642 fprintf (vect_dump, "dependence distance = %d.", dist);
644 if (dist == 0)
646 if (vect_print_dump_info (REPORT_DR_DETAILS))
648 fprintf (vect_dump, "dependence distance == 0 between ");
649 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
650 fprintf (vect_dump, " and ");
651 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
654 /* For interleaving, mark that there is a read-write dependency if
655 necessary. We check before that one of the data-refs is store. */
656 if (DR_IS_READ (dra))
657 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
658 else
660 if (DR_IS_READ (drb))
661 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
664 continue;
667 if (dist > 0 && DDR_REVERSED_P (ddr))
669 /* If DDR_REVERSED_P the order of the data-refs in DDR was
670 reversed (to make distance vector positive), and the actual
671 distance is negative. */
672 if (vect_print_dump_info (REPORT_DR_DETAILS))
673 fprintf (vect_dump, "dependence distance negative.");
674 continue;
677 if (abs (dist) >= 2
678 && abs (dist) < *max_vf)
680 /* The dependence distance requires reduction of the maximal
681 vectorization factor. */
682 *max_vf = abs (dist);
683 if (vect_print_dump_info (REPORT_DR_DETAILS))
684 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
685 *max_vf);
688 if (abs (dist) >= *max_vf)
690 /* Dependence distance does not create dependence, as far as
691 vectorization is concerned, in this case. */
692 if (vect_print_dump_info (REPORT_DR_DETAILS))
693 fprintf (vect_dump, "dependence distance >= VF.");
694 continue;
697 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
699 fprintf (vect_dump, "not vectorized, possible dependence "
700 "between data-refs ");
701 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
702 fprintf (vect_dump, " and ");
703 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
706 return true;
709 return false;
712 /* Function vect_analyze_data_ref_dependences.
714 Examine all the data references in the loop, and make sure there do not
715 exist any data dependences between them. Set *MAX_VF according to
716 the maximum vectorization factor the data dependences allow. */
718 bool
719 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
720 bb_vec_info bb_vinfo, int *max_vf,
721 bool *data_dependence_in_bb)
723 unsigned int i;
724 VEC (ddr_p, heap) *ddrs = NULL;
725 struct data_dependence_relation *ddr;
727 if (vect_print_dump_info (REPORT_DETAILS))
728 fprintf (vect_dump, "=== vect_analyze_dependences ===");
730 if (loop_vinfo)
731 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
732 else
733 ddrs = BB_VINFO_DDRS (bb_vinfo);
735 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
736 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
737 data_dependence_in_bb))
738 return false;
740 return true;
744 /* Function vect_compute_data_ref_alignment
746 Compute the misalignment of the data reference DR.
748 Output:
749 1. If during the misalignment computation it is found that the data reference
750 cannot be vectorized then false is returned.
751 2. DR_MISALIGNMENT (DR) is defined.
753 FOR NOW: No analysis is actually performed. Misalignment is calculated
754 only for trivial cases. TODO. */
756 static bool
757 vect_compute_data_ref_alignment (struct data_reference *dr)
759 gimple stmt = DR_STMT (dr);
760 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
761 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
762 struct loop *loop = NULL;
763 tree ref = DR_REF (dr);
764 tree vectype;
765 tree base, base_addr;
766 bool base_aligned;
767 tree misalign;
768 tree aligned_to, alignment;
770 if (vect_print_dump_info (REPORT_DETAILS))
771 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
773 if (loop_vinfo)
774 loop = LOOP_VINFO_LOOP (loop_vinfo);
776 /* Initialize misalignment to unknown. */
777 SET_DR_MISALIGNMENT (dr, -1);
779 misalign = DR_INIT (dr);
780 aligned_to = DR_ALIGNED_TO (dr);
781 base_addr = DR_BASE_ADDRESS (dr);
782 vectype = STMT_VINFO_VECTYPE (stmt_info);
784 /* In case the dataref is in an inner-loop of the loop that is being
785 vectorized (LOOP), we use the base and misalignment information
786 relative to the outer-loop (LOOP). This is ok only if the misalignment
787 stays the same throughout the execution of the inner-loop, which is why
788 we have to check that the stride of the dataref in the inner-loop evenly
789 divides by the vector size. */
790 if (loop && nested_in_vect_loop_p (loop, stmt))
792 tree step = DR_STEP (dr);
793 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
795 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
797 if (vect_print_dump_info (REPORT_ALIGNMENT))
798 fprintf (vect_dump, "inner step divides the vector-size.");
799 misalign = STMT_VINFO_DR_INIT (stmt_info);
800 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
801 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
803 else
805 if (vect_print_dump_info (REPORT_ALIGNMENT))
806 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
807 misalign = NULL_TREE;
811 base = build_fold_indirect_ref (base_addr);
812 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
814 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
815 || !misalign)
817 if (vect_print_dump_info (REPORT_ALIGNMENT))
819 fprintf (vect_dump, "Unknown alignment for access: ");
820 print_generic_expr (vect_dump, base, TDF_SLIM);
822 return true;
825 if ((DECL_P (base)
826 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
827 alignment) >= 0)
828 || (TREE_CODE (base_addr) == SSA_NAME
829 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
830 TREE_TYPE (base_addr)))),
831 alignment) >= 0))
832 base_aligned = true;
833 else
834 base_aligned = false;
836 if (!base_aligned)
838 /* Do not change the alignment of global variables if
839 flag_section_anchors is enabled. */
840 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
841 || (TREE_STATIC (base) && flag_section_anchors))
843 if (vect_print_dump_info (REPORT_DETAILS))
845 fprintf (vect_dump, "can't force alignment of ref: ");
846 print_generic_expr (vect_dump, ref, TDF_SLIM);
848 return true;
851 /* Force the alignment of the decl.
852 NOTE: This is the only change to the code we make during
853 the analysis phase, before deciding to vectorize the loop. */
854 if (vect_print_dump_info (REPORT_DETAILS))
856 fprintf (vect_dump, "force alignment of ");
857 print_generic_expr (vect_dump, ref, TDF_SLIM);
860 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
861 DECL_USER_ALIGN (base) = 1;
864 /* At this point we assume that the base is aligned. */
865 gcc_assert (base_aligned
866 || (TREE_CODE (base) == VAR_DECL
867 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
869 /* If this is a backward running DR then first access in the larger
870 vectype actually is N-1 elements before the address in the DR.
871 Adjust misalign accordingly. */
872 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
874 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
875 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
876 otherwise we wouldn't be here. */
877 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
878 /* PLUS because DR_STEP was negative. */
879 misalign = size_binop (PLUS_EXPR, misalign, offset);
882 /* Modulo alignment. */
883 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
885 if (!host_integerp (misalign, 1))
887 /* Negative or overflowed misalignment value. */
888 if (vect_print_dump_info (REPORT_DETAILS))
889 fprintf (vect_dump, "unexpected misalign value");
890 return false;
893 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
895 if (vect_print_dump_info (REPORT_DETAILS))
897 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
898 print_generic_expr (vect_dump, ref, TDF_SLIM);
901 return true;
905 /* Function vect_compute_data_refs_alignment
907 Compute the misalignment of data references in the loop.
908 Return FALSE if a data reference is found that cannot be vectorized. */
910 static bool
911 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
912 bb_vec_info bb_vinfo)
914 VEC (data_reference_p, heap) *datarefs;
915 struct data_reference *dr;
916 unsigned int i;
918 if (loop_vinfo)
919 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
920 else
921 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
923 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
924 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
925 && !vect_compute_data_ref_alignment (dr))
927 if (bb_vinfo)
929 /* Mark unsupported statement as unvectorizable. */
930 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
931 continue;
933 else
934 return false;
937 return true;
941 /* Function vect_update_misalignment_for_peel
943 DR - the data reference whose misalignment is to be adjusted.
944 DR_PEEL - the data reference whose misalignment is being made
945 zero in the vector loop by the peel.
946 NPEEL - the number of iterations in the peel loop if the misalignment
947 of DR_PEEL is known at compile time. */
949 static void
950 vect_update_misalignment_for_peel (struct data_reference *dr,
951 struct data_reference *dr_peel, int npeel)
953 unsigned int i;
954 VEC(dr_p,heap) *same_align_drs;
955 struct data_reference *current_dr;
956 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
957 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
958 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
959 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
961 /* For interleaved data accesses the step in the loop must be multiplied by
962 the size of the interleaving group. */
963 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
964 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
965 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
966 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
968 /* It can be assumed that the data refs with the same alignment as dr_peel
969 are aligned in the vector loop. */
970 same_align_drs
971 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
972 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
974 if (current_dr != dr)
975 continue;
976 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
977 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
978 SET_DR_MISALIGNMENT (dr, 0);
979 return;
982 if (known_alignment_for_access_p (dr)
983 && known_alignment_for_access_p (dr_peel))
985 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
986 int misal = DR_MISALIGNMENT (dr);
987 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
988 misal += negative ? -npeel * dr_size : npeel * dr_size;
989 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
990 SET_DR_MISALIGNMENT (dr, misal);
991 return;
994 if (vect_print_dump_info (REPORT_DETAILS))
995 fprintf (vect_dump, "Setting misalignment to -1.");
996 SET_DR_MISALIGNMENT (dr, -1);
1000 /* Function vect_verify_datarefs_alignment
1002 Return TRUE if all data references in the loop can be
1003 handled with respect to alignment. */
1005 bool
1006 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1008 VEC (data_reference_p, heap) *datarefs;
1009 struct data_reference *dr;
1010 enum dr_alignment_support supportable_dr_alignment;
1011 unsigned int i;
1013 if (loop_vinfo)
1014 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1015 else
1016 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1018 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1020 gimple stmt = DR_STMT (dr);
1021 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1023 /* For interleaving, only the alignment of the first access matters.
1024 Skip statements marked as not vectorizable. */
1025 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1026 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1027 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1028 continue;
1030 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1031 if (!supportable_dr_alignment)
1033 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1035 if (DR_IS_READ (dr))
1036 fprintf (vect_dump,
1037 "not vectorized: unsupported unaligned load.");
1038 else
1039 fprintf (vect_dump,
1040 "not vectorized: unsupported unaligned store.");
1042 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1044 return false;
1046 if (supportable_dr_alignment != dr_aligned
1047 && vect_print_dump_info (REPORT_ALIGNMENT))
1048 fprintf (vect_dump, "Vectorizing an unaligned access.");
1050 return true;
1054 /* Function vector_alignment_reachable_p
1056 Return true if vector alignment for DR is reachable by peeling
1057 a few loop iterations. Return false otherwise. */
1059 static bool
1060 vector_alignment_reachable_p (struct data_reference *dr)
1062 gimple stmt = DR_STMT (dr);
1063 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1064 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1066 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1068 /* For interleaved access we peel only if number of iterations in
1069 the prolog loop ({VF - misalignment}), is a multiple of the
1070 number of the interleaved accesses. */
1071 int elem_size, mis_in_elements;
1072 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1074 /* FORNOW: handle only known alignment. */
1075 if (!known_alignment_for_access_p (dr))
1076 return false;
1078 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1079 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1081 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1082 return false;
1085 /* If misalignment is known at the compile time then allow peeling
1086 only if natural alignment is reachable through peeling. */
1087 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1089 HOST_WIDE_INT elmsize =
1090 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1091 if (vect_print_dump_info (REPORT_DETAILS))
1093 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1094 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1096 if (DR_MISALIGNMENT (dr) % elmsize)
1098 if (vect_print_dump_info (REPORT_DETAILS))
1099 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1100 return false;
1104 if (!known_alignment_for_access_p (dr))
1106 tree type = (TREE_TYPE (DR_REF (dr)));
1107 tree ba = DR_BASE_OBJECT (dr);
1108 bool is_packed = false;
1110 if (ba)
1111 is_packed = contains_packed_reference (ba);
1113 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1114 is_packed = true;
1116 if (vect_print_dump_info (REPORT_DETAILS))
1117 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1118 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1119 return true;
1120 else
1121 return false;
1124 return true;
1128 /* Calculate the cost of the memory access represented by DR. */
1130 static void
1131 vect_get_data_access_cost (struct data_reference *dr,
1132 unsigned int *inside_cost,
1133 unsigned int *outside_cost)
1135 gimple stmt = DR_STMT (dr);
1136 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1137 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1138 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1139 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1140 int ncopies = vf / nunits;
1141 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1143 if (!supportable_dr_alignment)
1144 *inside_cost = VECT_MAX_COST;
1145 else
1147 if (DR_IS_READ (dr))
1148 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1149 else
1150 vect_get_store_cost (dr, ncopies, inside_cost);
1153 if (vect_print_dump_info (REPORT_COST))
1154 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1155 "outside_cost = %d.", *inside_cost, *outside_cost);
1159 static hashval_t
1160 vect_peeling_hash (const void *elem)
1162 const struct _vect_peel_info *peel_info;
1164 peel_info = (const struct _vect_peel_info *) elem;
1165 return (hashval_t) peel_info->npeel;
1169 static int
1170 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1172 const struct _vect_peel_info *a, *b;
1174 a = (const struct _vect_peel_info *) elem1;
1175 b = (const struct _vect_peel_info *) elem2;
1176 return (a->npeel == b->npeel);
1180 /* Insert DR into peeling hash table with NPEEL as key. */
1182 static void
1183 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1184 int npeel)
1186 struct _vect_peel_info elem, *slot;
1187 void **new_slot;
1188 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1190 elem.npeel = npeel;
1191 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1192 &elem);
1193 if (slot)
1194 slot->count++;
1195 else
1197 slot = XNEW (struct _vect_peel_info);
1198 slot->npeel = npeel;
1199 slot->dr = dr;
1200 slot->count = 1;
1201 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1202 INSERT);
1203 *new_slot = slot;
1206 if (!supportable_dr_alignment && !flag_vect_cost_model)
1207 slot->count += VECT_MAX_COST;
1211 /* Traverse peeling hash table to find peeling option that aligns maximum
1212 number of data accesses. */
1214 static int
1215 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1217 vect_peel_info elem = (vect_peel_info) *slot;
1218 vect_peel_extended_info max = (vect_peel_extended_info) data;
1220 if (elem->count > max->peel_info.count)
1222 max->peel_info.npeel = elem->npeel;
1223 max->peel_info.count = elem->count;
1224 max->peel_info.dr = elem->dr;
1227 return 1;
1231 /* Traverse peeling hash table and calculate cost for each peeling option.
1232 Find the one with the lowest cost. */
1234 static int
1235 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1237 vect_peel_info elem = (vect_peel_info) *slot;
1238 vect_peel_extended_info min = (vect_peel_extended_info) data;
1239 int save_misalignment, dummy;
1240 unsigned int inside_cost = 0, outside_cost = 0, i;
1241 gimple stmt = DR_STMT (elem->dr);
1242 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1243 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1244 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1245 struct data_reference *dr;
1247 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1249 stmt = DR_STMT (dr);
1250 stmt_info = vinfo_for_stmt (stmt);
1251 /* For interleaving, only the alignment of the first access
1252 matters. */
1253 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1254 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1255 continue;
1257 save_misalignment = DR_MISALIGNMENT (dr);
1258 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1259 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1260 SET_DR_MISALIGNMENT (dr, save_misalignment);
1263 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1264 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1266 if (inside_cost < min->inside_cost
1267 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1269 min->inside_cost = inside_cost;
1270 min->outside_cost = outside_cost;
1271 min->peel_info.dr = elem->dr;
1272 min->peel_info.npeel = elem->npeel;
1275 return 1;
1279 /* Choose best peeling option by traversing peeling hash table and either
1280 choosing an option with the lowest cost (if cost model is enabled) or the
1281 option that aligns as many accesses as possible. */
1283 static struct data_reference *
1284 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1285 unsigned int *npeel)
1287 struct _vect_peel_extended_info res;
1289 res.peel_info.dr = NULL;
1291 if (flag_vect_cost_model)
1293 res.inside_cost = INT_MAX;
1294 res.outside_cost = INT_MAX;
1295 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1296 vect_peeling_hash_get_lowest_cost, &res);
1298 else
1300 res.peel_info.count = 0;
1301 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1302 vect_peeling_hash_get_most_frequent, &res);
1305 *npeel = res.peel_info.npeel;
1306 return res.peel_info.dr;
1310 /* Function vect_enhance_data_refs_alignment
1312 This pass will use loop versioning and loop peeling in order to enhance
1313 the alignment of data references in the loop.
1315 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1316 original loop is to be vectorized. Any other loops that are created by
1317 the transformations performed in this pass - are not supposed to be
1318 vectorized. This restriction will be relaxed.
1320 This pass will require a cost model to guide it whether to apply peeling
1321 or versioning or a combination of the two. For example, the scheme that
1322 intel uses when given a loop with several memory accesses, is as follows:
1323 choose one memory access ('p') which alignment you want to force by doing
1324 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1325 other accesses are not necessarily aligned, or (2) use loop versioning to
1326 generate one loop in which all accesses are aligned, and another loop in
1327 which only 'p' is necessarily aligned.
1329 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1330 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1331 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1333 Devising a cost model is the most critical aspect of this work. It will
1334 guide us on which access to peel for, whether to use loop versioning, how
1335 many versions to create, etc. The cost model will probably consist of
1336 generic considerations as well as target specific considerations (on
1337 powerpc for example, misaligned stores are more painful than misaligned
1338 loads).
1340 Here are the general steps involved in alignment enhancements:
1342 -- original loop, before alignment analysis:
1343 for (i=0; i<N; i++){
1344 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1345 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1348 -- After vect_compute_data_refs_alignment:
1349 for (i=0; i<N; i++){
1350 x = q[i]; # DR_MISALIGNMENT(q) = 3
1351 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1354 -- Possibility 1: we do loop versioning:
1355 if (p is aligned) {
1356 for (i=0; i<N; i++){ # loop 1A
1357 x = q[i]; # DR_MISALIGNMENT(q) = 3
1358 p[i] = y; # DR_MISALIGNMENT(p) = 0
1361 else {
1362 for (i=0; i<N; i++){ # loop 1B
1363 x = q[i]; # DR_MISALIGNMENT(q) = 3
1364 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1368 -- Possibility 2: we do loop peeling:
1369 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1370 x = q[i];
1371 p[i] = y;
1373 for (i = 3; i < N; i++){ # loop 2A
1374 x = q[i]; # DR_MISALIGNMENT(q) = 0
1375 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1378 -- Possibility 3: combination of loop peeling and versioning:
1379 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1380 x = q[i];
1381 p[i] = y;
1383 if (p is aligned) {
1384 for (i = 3; i<N; i++){ # loop 3A
1385 x = q[i]; # DR_MISALIGNMENT(q) = 0
1386 p[i] = y; # DR_MISALIGNMENT(p) = 0
1389 else {
1390 for (i = 3; i<N; i++){ # loop 3B
1391 x = q[i]; # DR_MISALIGNMENT(q) = 0
1392 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1396 These loops are later passed to loop_transform to be vectorized. The
1397 vectorizer will use the alignment information to guide the transformation
1398 (whether to generate regular loads/stores, or with special handling for
1399 misalignment). */
1401 bool
1402 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1404 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1405 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1406 enum dr_alignment_support supportable_dr_alignment;
1407 struct data_reference *dr0 = NULL, *first_store = NULL;
1408 struct data_reference *dr;
1409 unsigned int i, j;
1410 bool do_peeling = false;
1411 bool do_versioning = false;
1412 bool stat;
1413 gimple stmt;
1414 stmt_vec_info stmt_info;
1415 int vect_versioning_for_alias_required;
1416 unsigned int npeel = 0;
1417 bool all_misalignments_unknown = true;
1418 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1419 unsigned possible_npeel_number = 1;
1420 tree vectype;
1421 unsigned int nelements, mis, same_align_drs_max = 0;
1423 if (vect_print_dump_info (REPORT_DETAILS))
1424 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1426 /* While cost model enhancements are expected in the future, the high level
1427 view of the code at this time is as follows:
1429 A) If there is a misaligned access then see if peeling to align
1430 this access can make all data references satisfy
1431 vect_supportable_dr_alignment. If so, update data structures
1432 as needed and return true.
1434 B) If peeling wasn't possible and there is a data reference with an
1435 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1436 then see if loop versioning checks can be used to make all data
1437 references satisfy vect_supportable_dr_alignment. If so, update
1438 data structures as needed and return true.
1440 C) If neither peeling nor versioning were successful then return false if
1441 any data reference does not satisfy vect_supportable_dr_alignment.
1443 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1445 Note, Possibility 3 above (which is peeling and versioning together) is not
1446 being done at this time. */
1448 /* (1) Peeling to force alignment. */
1450 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1451 Considerations:
1452 + How many accesses will become aligned due to the peeling
1453 - How many accesses will become unaligned due to the peeling,
1454 and the cost of misaligned accesses.
1455 - The cost of peeling (the extra runtime checks, the increase
1456 in code size). */
1458 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1460 stmt = DR_STMT (dr);
1461 stmt_info = vinfo_for_stmt (stmt);
1463 /* For interleaving, only the alignment of the first access
1464 matters. */
1465 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1466 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1467 continue;
1469 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1470 do_peeling = vector_alignment_reachable_p (dr);
1471 if (do_peeling)
1473 if (known_alignment_for_access_p (dr))
1475 unsigned int npeel_tmp;
1476 bool negative = tree_int_cst_compare (DR_STEP (dr),
1477 size_zero_node) < 0;
1479 /* Save info about DR in the hash table. */
1480 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1481 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1482 htab_create (1, vect_peeling_hash,
1483 vect_peeling_hash_eq, free);
1485 vectype = STMT_VINFO_VECTYPE (stmt_info);
1486 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1487 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1488 TREE_TYPE (DR_REF (dr))));
1489 npeel_tmp = (negative
1490 ? (mis - nelements) : (nelements - mis))
1491 & (nelements - 1);
1493 /* For multiple types, it is possible that the bigger type access
1494 will have more than one peeling option. E.g., a loop with two
1495 types: one of size (vector size / 4), and the other one of
1496 size (vector size / 8). Vectorization factor will 8. If both
1497 access are misaligned by 3, the first one needs one scalar
1498 iteration to be aligned, and the second one needs 5. But the
1499 the first one will be aligned also by peeling 5 scalar
1500 iterations, and in that case both accesses will be aligned.
1501 Hence, except for the immediate peeling amount, we also want
1502 to try to add full vector size, while we don't exceed
1503 vectorization factor.
1504 We do this automtically for cost model, since we calculate cost
1505 for every peeling option. */
1506 if (!flag_vect_cost_model)
1507 possible_npeel_number = vf /nelements;
1509 /* Handle the aligned case. We may decide to align some other
1510 access, making DR unaligned. */
1511 if (DR_MISALIGNMENT (dr) == 0)
1513 npeel_tmp = 0;
1514 if (!flag_vect_cost_model)
1515 possible_npeel_number++;
1518 for (j = 0; j < possible_npeel_number; j++)
1520 gcc_assert (npeel_tmp <= vf);
1521 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1522 npeel_tmp += nelements;
1525 all_misalignments_unknown = false;
1526 /* Data-ref that was chosen for the case that all the
1527 misalignments are unknown is not relevant anymore, since we
1528 have a data-ref with known alignment. */
1529 dr0 = NULL;
1531 else
1533 /* If we don't know all the misalignment values, we prefer
1534 peeling for data-ref that has maximum number of data-refs
1535 with the same alignment, unless the target prefers to align
1536 stores over load. */
1537 if (all_misalignments_unknown)
1539 if (same_align_drs_max < VEC_length (dr_p,
1540 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1541 || !dr0)
1543 same_align_drs_max = VEC_length (dr_p,
1544 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1545 dr0 = dr;
1548 if (!first_store && DR_IS_WRITE (dr))
1549 first_store = dr;
1552 /* If there are both known and unknown misaligned accesses in the
1553 loop, we choose peeling amount according to the known
1554 accesses. */
1557 if (!supportable_dr_alignment)
1559 dr0 = dr;
1560 if (!first_store && DR_IS_WRITE (dr))
1561 first_store = dr;
1565 else
1567 if (!aligned_access_p (dr))
1569 if (vect_print_dump_info (REPORT_DETAILS))
1570 fprintf (vect_dump, "vector alignment may not be reachable");
1572 break;
1577 vect_versioning_for_alias_required
1578 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1580 /* Temporarily, if versioning for alias is required, we disable peeling
1581 until we support peeling and versioning. Often peeling for alignment
1582 will require peeling for loop-bound, which in turn requires that we
1583 know how to adjust the loop ivs after the loop. */
1584 if (vect_versioning_for_alias_required
1585 || !vect_can_advance_ivs_p (loop_vinfo)
1586 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1587 do_peeling = false;
1589 if (do_peeling && all_misalignments_unknown
1590 && vect_supportable_dr_alignment (dr0, false))
1593 /* Check if the target requires to prefer stores over loads, i.e., if
1594 misaligned stores are more expensive than misaligned loads (taking
1595 drs with same alignment into account). */
1596 if (first_store && DR_IS_READ (dr0))
1598 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1599 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1600 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1601 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1603 vect_get_data_access_cost (dr0, &load_inside_cost,
1604 &load_outside_cost);
1605 vect_get_data_access_cost (first_store, &store_inside_cost,
1606 &store_outside_cost);
1608 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1609 aligning the load DR0). */
1610 load_inside_penalty = store_inside_cost;
1611 load_outside_penalty = store_outside_cost;
1612 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1613 (vinfo_for_stmt (DR_STMT (first_store))),
1614 i, dr);
1615 i++)
1616 if (DR_IS_READ (dr))
1618 load_inside_penalty += load_inside_cost;
1619 load_outside_penalty += load_outside_cost;
1621 else
1623 load_inside_penalty += store_inside_cost;
1624 load_outside_penalty += store_outside_cost;
1627 /* Calculate the penalty for leaving DR0 unaligned (by
1628 aligning the FIRST_STORE). */
1629 store_inside_penalty = load_inside_cost;
1630 store_outside_penalty = load_outside_cost;
1631 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1632 (vinfo_for_stmt (DR_STMT (dr0))),
1633 i, dr);
1634 i++)
1635 if (DR_IS_READ (dr))
1637 store_inside_penalty += load_inside_cost;
1638 store_outside_penalty += load_outside_cost;
1640 else
1642 store_inside_penalty += store_inside_cost;
1643 store_outside_penalty += store_outside_cost;
1646 if (load_inside_penalty > store_inside_penalty
1647 || (load_inside_penalty == store_inside_penalty
1648 && load_outside_penalty > store_outside_penalty))
1649 dr0 = first_store;
1652 /* In case there are only loads with different unknown misalignments, use
1653 peeling only if it may help to align other accesses in the loop. */
1654 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1655 (vinfo_for_stmt (DR_STMT (dr0))))
1656 && vect_supportable_dr_alignment (dr0, false)
1657 != dr_unaligned_supported)
1658 do_peeling = false;
1661 if (do_peeling && !dr0)
1663 /* Peeling is possible, but there is no data access that is not supported
1664 unless aligned. So we try to choose the best possible peeling. */
1666 /* We should get here only if there are drs with known misalignment. */
1667 gcc_assert (!all_misalignments_unknown);
1669 /* Choose the best peeling from the hash table. */
1670 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1671 if (!dr0 || !npeel)
1672 do_peeling = false;
1675 if (do_peeling)
1677 stmt = DR_STMT (dr0);
1678 stmt_info = vinfo_for_stmt (stmt);
1679 vectype = STMT_VINFO_VECTYPE (stmt_info);
1680 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1682 if (known_alignment_for_access_p (dr0))
1684 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1685 size_zero_node) < 0;
1686 if (!npeel)
1688 /* Since it's known at compile time, compute the number of
1689 iterations in the peeled loop (the peeling factor) for use in
1690 updating DR_MISALIGNMENT values. The peeling factor is the
1691 vectorization factor minus the misalignment as an element
1692 count. */
1693 mis = DR_MISALIGNMENT (dr0);
1694 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1695 npeel = ((negative ? mis - nelements : nelements - mis)
1696 & (nelements - 1));
1699 /* For interleaved data access every iteration accesses all the
1700 members of the group, therefore we divide the number of iterations
1701 by the group size. */
1702 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1703 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1704 npeel /= DR_GROUP_SIZE (stmt_info);
1706 if (vect_print_dump_info (REPORT_DETAILS))
1707 fprintf (vect_dump, "Try peeling by %d", npeel);
1710 /* Ensure that all data refs can be vectorized after the peel. */
1711 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1713 int save_misalignment;
1715 if (dr == dr0)
1716 continue;
1718 stmt = DR_STMT (dr);
1719 stmt_info = vinfo_for_stmt (stmt);
1720 /* For interleaving, only the alignment of the first access
1721 matters. */
1722 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1723 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1724 continue;
1726 save_misalignment = DR_MISALIGNMENT (dr);
1727 vect_update_misalignment_for_peel (dr, dr0, npeel);
1728 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1729 SET_DR_MISALIGNMENT (dr, save_misalignment);
1731 if (!supportable_dr_alignment)
1733 do_peeling = false;
1734 break;
1738 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1740 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1741 if (!stat)
1742 do_peeling = false;
1743 else
1744 return stat;
1747 if (do_peeling)
1749 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1750 If the misalignment of DR_i is identical to that of dr0 then set
1751 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1752 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1753 by the peeling factor times the element size of DR_i (MOD the
1754 vectorization factor times the size). Otherwise, the
1755 misalignment of DR_i must be set to unknown. */
1756 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1757 if (dr != dr0)
1758 vect_update_misalignment_for_peel (dr, dr0, npeel);
1760 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1761 if (npeel)
1762 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1763 else
1764 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1765 SET_DR_MISALIGNMENT (dr0, 0);
1766 if (vect_print_dump_info (REPORT_ALIGNMENT))
1767 fprintf (vect_dump, "Alignment of access forced using peeling.");
1769 if (vect_print_dump_info (REPORT_DETAILS))
1770 fprintf (vect_dump, "Peeling for alignment will be applied.");
1772 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1773 gcc_assert (stat);
1774 return stat;
1779 /* (2) Versioning to force alignment. */
1781 /* Try versioning if:
1782 1) flag_tree_vect_loop_version is TRUE
1783 2) optimize loop for speed
1784 3) there is at least one unsupported misaligned data ref with an unknown
1785 misalignment, and
1786 4) all misaligned data refs with a known misalignment are supported, and
1787 5) the number of runtime alignment checks is within reason. */
1789 do_versioning =
1790 flag_tree_vect_loop_version
1791 && optimize_loop_nest_for_speed_p (loop)
1792 && (!loop->inner); /* FORNOW */
1794 if (do_versioning)
1796 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1798 stmt = DR_STMT (dr);
1799 stmt_info = vinfo_for_stmt (stmt);
1801 /* For interleaving, only the alignment of the first access
1802 matters. */
1803 if (aligned_access_p (dr)
1804 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1805 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1806 continue;
1808 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1810 if (!supportable_dr_alignment)
1812 gimple stmt;
1813 int mask;
1814 tree vectype;
1816 if (known_alignment_for_access_p (dr)
1817 || VEC_length (gimple,
1818 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1819 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1821 do_versioning = false;
1822 break;
1825 stmt = DR_STMT (dr);
1826 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1827 gcc_assert (vectype);
1829 /* The rightmost bits of an aligned address must be zeros.
1830 Construct the mask needed for this test. For example,
1831 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1832 mask must be 15 = 0xf. */
1833 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1835 /* FORNOW: use the same mask to test all potentially unaligned
1836 references in the loop. The vectorizer currently supports
1837 a single vector size, see the reference to
1838 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1839 vectorization factor is computed. */
1840 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1841 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1842 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1843 VEC_safe_push (gimple, heap,
1844 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1845 DR_STMT (dr));
1849 /* Versioning requires at least one misaligned data reference. */
1850 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1851 do_versioning = false;
1852 else if (!do_versioning)
1853 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1856 if (do_versioning)
1858 VEC(gimple,heap) *may_misalign_stmts
1859 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1860 gimple stmt;
1862 /* It can now be assumed that the data references in the statements
1863 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1864 of the loop being vectorized. */
1865 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1867 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1868 dr = STMT_VINFO_DATA_REF (stmt_info);
1869 SET_DR_MISALIGNMENT (dr, 0);
1870 if (vect_print_dump_info (REPORT_ALIGNMENT))
1871 fprintf (vect_dump, "Alignment of access forced using versioning.");
1874 if (vect_print_dump_info (REPORT_DETAILS))
1875 fprintf (vect_dump, "Versioning for alignment will be applied.");
1877 /* Peeling and versioning can't be done together at this time. */
1878 gcc_assert (! (do_peeling && do_versioning));
1880 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1881 gcc_assert (stat);
1882 return stat;
1885 /* This point is reached if neither peeling nor versioning is being done. */
1886 gcc_assert (! (do_peeling || do_versioning));
1888 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1889 return stat;
1893 /* Function vect_find_same_alignment_drs.
1895 Update group and alignment relations according to the chosen
1896 vectorization factor. */
1898 static void
1899 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1900 loop_vec_info loop_vinfo)
1902 unsigned int i;
1903 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1904 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1905 struct data_reference *dra = DDR_A (ddr);
1906 struct data_reference *drb = DDR_B (ddr);
1907 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1908 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1909 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1910 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1911 lambda_vector dist_v;
1912 unsigned int loop_depth;
1914 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1915 return;
1917 if (dra == drb)
1918 return;
1920 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1921 return;
1923 /* Loop-based vectorization and known data dependence. */
1924 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1925 return;
1927 /* Data-dependence analysis reports a distance vector of zero
1928 for data-references that overlap only in the first iteration
1929 but have different sign step (see PR45764).
1930 So as a sanity check require equal DR_STEP. */
1931 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1932 return;
1934 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1935 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1937 int dist = dist_v[loop_depth];
1939 if (vect_print_dump_info (REPORT_DR_DETAILS))
1940 fprintf (vect_dump, "dependence distance = %d.", dist);
1942 /* Same loop iteration. */
1943 if (dist == 0
1944 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1946 /* Two references with distance zero have the same alignment. */
1947 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1948 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1949 if (vect_print_dump_info (REPORT_ALIGNMENT))
1950 fprintf (vect_dump, "accesses have the same alignment.");
1951 if (vect_print_dump_info (REPORT_DR_DETAILS))
1953 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1954 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1955 fprintf (vect_dump, " and ");
1956 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1963 /* Function vect_analyze_data_refs_alignment
1965 Analyze the alignment of the data-references in the loop.
1966 Return FALSE if a data reference is found that cannot be vectorized. */
1968 bool
1969 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1970 bb_vec_info bb_vinfo)
1972 if (vect_print_dump_info (REPORT_DETAILS))
1973 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1975 /* Mark groups of data references with same alignment using
1976 data dependence information. */
1977 if (loop_vinfo)
1979 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1980 struct data_dependence_relation *ddr;
1981 unsigned int i;
1983 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
1984 vect_find_same_alignment_drs (ddr, loop_vinfo);
1987 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1989 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1990 fprintf (vect_dump,
1991 "not vectorized: can't calculate alignment for data ref.");
1992 return false;
1995 return true;
1999 /* Analyze groups of strided accesses: check that DR belongs to a group of
2000 strided accesses of legal size, step, etc. Detect gaps, single element
2001 interleaving, and other special cases. Set strided access info.
2002 Collect groups of strided stores for further use in SLP analysis. */
2004 static bool
2005 vect_analyze_group_access (struct data_reference *dr)
2007 tree step = DR_STEP (dr);
2008 tree scalar_type = TREE_TYPE (DR_REF (dr));
2009 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2010 gimple stmt = DR_STMT (dr);
2011 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2012 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2013 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2014 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2015 HOST_WIDE_INT stride;
2016 bool slp_impossible = false;
2018 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2019 interleaving group (including gaps). */
2020 stride = dr_step / type_size;
2022 /* Not consecutive access is possible only if it is a part of interleaving. */
2023 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2025 /* Check if it this DR is a part of interleaving, and is a single
2026 element of the group that is accessed in the loop. */
2028 /* Gaps are supported only for loads. STEP must be a multiple of the type
2029 size. The size of the group must be a power of 2. */
2030 if (DR_IS_READ (dr)
2031 && (dr_step % type_size) == 0
2032 && stride > 0
2033 && exact_log2 (stride) != -1)
2035 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2036 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2037 if (vect_print_dump_info (REPORT_DR_DETAILS))
2039 fprintf (vect_dump, "Detected single element interleaving ");
2040 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2041 fprintf (vect_dump, " step ");
2042 print_generic_expr (vect_dump, step, TDF_SLIM);
2044 return true;
2047 if (vect_print_dump_info (REPORT_DETAILS))
2049 fprintf (vect_dump, "not consecutive access ");
2050 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2053 if (bb_vinfo)
2055 /* Mark the statement as unvectorizable. */
2056 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2057 return true;
2060 return false;
2063 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2065 /* First stmt in the interleaving chain. Check the chain. */
2066 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2067 struct data_reference *data_ref = dr;
2068 unsigned int count = 1;
2069 tree next_step;
2070 tree prev_init = DR_INIT (data_ref);
2071 gimple prev = stmt;
2072 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2074 while (next)
2076 /* Skip same data-refs. In case that two or more stmts share
2077 data-ref (supported only for loads), we vectorize only the first
2078 stmt, and the rest get their vectorized loads from the first
2079 one. */
2080 if (!tree_int_cst_compare (DR_INIT (data_ref),
2081 DR_INIT (STMT_VINFO_DATA_REF (
2082 vinfo_for_stmt (next)))))
2084 if (DR_IS_WRITE (data_ref))
2086 if (vect_print_dump_info (REPORT_DETAILS))
2087 fprintf (vect_dump, "Two store stmts share the same dr.");
2088 return false;
2091 /* Check that there is no load-store dependencies for this loads
2092 to prevent a case of load-store-load to the same location. */
2093 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2094 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2096 if (vect_print_dump_info (REPORT_DETAILS))
2097 fprintf (vect_dump,
2098 "READ_WRITE dependence in interleaving.");
2099 return false;
2102 /* For load use the same data-ref load. */
2103 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2105 prev = next;
2106 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2107 continue;
2109 prev = next;
2111 /* Check that all the accesses have the same STEP. */
2112 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2113 if (tree_int_cst_compare (step, next_step))
2115 if (vect_print_dump_info (REPORT_DETAILS))
2116 fprintf (vect_dump, "not consecutive access in interleaving");
2117 return false;
2120 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2121 /* Check that the distance between two accesses is equal to the type
2122 size. Otherwise, we have gaps. */
2123 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2124 - TREE_INT_CST_LOW (prev_init)) / type_size;
2125 if (diff != 1)
2127 /* FORNOW: SLP of accesses with gaps is not supported. */
2128 slp_impossible = true;
2129 if (DR_IS_WRITE (data_ref))
2131 if (vect_print_dump_info (REPORT_DETAILS))
2132 fprintf (vect_dump, "interleaved store with gaps");
2133 return false;
2136 gaps += diff - 1;
2139 /* Store the gap from the previous member of the group. If there is no
2140 gap in the access, DR_GROUP_GAP is always 1. */
2141 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2143 prev_init = DR_INIT (data_ref);
2144 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2145 /* Count the number of data-refs in the chain. */
2146 count++;
2149 /* COUNT is the number of accesses found, we multiply it by the size of
2150 the type to get COUNT_IN_BYTES. */
2151 count_in_bytes = type_size * count;
2153 /* Check that the size of the interleaving (including gaps) is not
2154 greater than STEP. */
2155 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2157 if (vect_print_dump_info (REPORT_DETAILS))
2159 fprintf (vect_dump, "interleaving size is greater than step for ");
2160 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2162 return false;
2165 /* Check that the size of the interleaving is equal to STEP for stores,
2166 i.e., that there are no gaps. */
2167 if (dr_step && dr_step != count_in_bytes)
2169 if (DR_IS_READ (dr))
2171 slp_impossible = true;
2172 /* There is a gap after the last load in the group. This gap is a
2173 difference between the stride and the number of elements. When
2174 there is no gap, this difference should be 0. */
2175 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2177 else
2179 if (vect_print_dump_info (REPORT_DETAILS))
2180 fprintf (vect_dump, "interleaved store with gaps");
2181 return false;
2185 /* Check that STEP is a multiple of type size. */
2186 if (dr_step && (dr_step % type_size) != 0)
2188 if (vect_print_dump_info (REPORT_DETAILS))
2190 fprintf (vect_dump, "step is not a multiple of type size: step ");
2191 print_generic_expr (vect_dump, step, TDF_SLIM);
2192 fprintf (vect_dump, " size ");
2193 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2194 TDF_SLIM);
2196 return false;
2199 /* FORNOW: we handle only interleaving that is a power of 2.
2200 We don't fail here if it may be still possible to vectorize the
2201 group using SLP. If not, the size of the group will be checked in
2202 vect_analyze_operations, and the vectorization will fail. */
2203 if (exact_log2 (stride) == -1)
2205 if (vect_print_dump_info (REPORT_DETAILS))
2206 fprintf (vect_dump, "interleaving is not a power of 2");
2208 if (slp_impossible)
2209 return false;
2212 if (stride == 0)
2213 stride = count;
2215 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2216 if (vect_print_dump_info (REPORT_DETAILS))
2217 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2219 /* SLP: create an SLP data structure for every interleaving group of
2220 stores for further analysis in vect_analyse_slp. */
2221 if (DR_IS_WRITE (dr) && !slp_impossible)
2223 if (loop_vinfo)
2224 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2225 stmt);
2226 if (bb_vinfo)
2227 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2228 stmt);
2232 return true;
2236 /* Analyze the access pattern of the data-reference DR.
2237 In case of non-consecutive accesses call vect_analyze_group_access() to
2238 analyze groups of strided accesses. */
2240 static bool
2241 vect_analyze_data_ref_access (struct data_reference *dr)
2243 tree step = DR_STEP (dr);
2244 tree scalar_type = TREE_TYPE (DR_REF (dr));
2245 gimple stmt = DR_STMT (dr);
2246 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2247 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2248 struct loop *loop = NULL;
2249 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2251 if (loop_vinfo)
2252 loop = LOOP_VINFO_LOOP (loop_vinfo);
2254 if (loop_vinfo && !step)
2256 if (vect_print_dump_info (REPORT_DETAILS))
2257 fprintf (vect_dump, "bad data-ref access in loop");
2258 return false;
2261 /* Don't allow invariant accesses in loops. */
2262 if (loop_vinfo && dr_step == 0)
2263 return false;
2265 if (loop && nested_in_vect_loop_p (loop, stmt))
2267 /* Interleaved accesses are not yet supported within outer-loop
2268 vectorization for references in the inner-loop. */
2269 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2271 /* For the rest of the analysis we use the outer-loop step. */
2272 step = STMT_VINFO_DR_STEP (stmt_info);
2273 dr_step = TREE_INT_CST_LOW (step);
2275 if (dr_step == 0)
2277 if (vect_print_dump_info (REPORT_ALIGNMENT))
2278 fprintf (vect_dump, "zero step in outer loop.");
2279 if (DR_IS_READ (dr))
2280 return true;
2281 else
2282 return false;
2286 /* Consecutive? */
2287 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2288 || (dr_step < 0
2289 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2291 /* Mark that it is not interleaving. */
2292 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2293 return true;
2296 if (loop && nested_in_vect_loop_p (loop, stmt))
2298 if (vect_print_dump_info (REPORT_ALIGNMENT))
2299 fprintf (vect_dump, "strided access in outer loop.");
2300 return false;
2303 /* Not consecutive access - check if it's a part of interleaving group. */
2304 return vect_analyze_group_access (dr);
2308 /* Function vect_analyze_data_ref_accesses.
2310 Analyze the access pattern of all the data references in the loop.
2312 FORNOW: the only access pattern that is considered vectorizable is a
2313 simple step 1 (consecutive) access.
2315 FORNOW: handle only arrays and pointer accesses. */
2317 bool
2318 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2320 unsigned int i;
2321 VEC (data_reference_p, heap) *datarefs;
2322 struct data_reference *dr;
2324 if (vect_print_dump_info (REPORT_DETAILS))
2325 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2327 if (loop_vinfo)
2328 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2329 else
2330 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2332 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2333 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2334 && !vect_analyze_data_ref_access (dr))
2336 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2337 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2339 if (bb_vinfo)
2341 /* Mark the statement as not vectorizable. */
2342 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2343 continue;
2345 else
2346 return false;
2349 return true;
2352 /* Function vect_prune_runtime_alias_test_list.
2354 Prune a list of ddrs to be tested at run-time by versioning for alias.
2355 Return FALSE if resulting list of ddrs is longer then allowed by
2356 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2358 bool
2359 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2361 VEC (ddr_p, heap) * ddrs =
2362 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2363 unsigned i, j;
2365 if (vect_print_dump_info (REPORT_DETAILS))
2366 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2368 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2370 bool found;
2371 ddr_p ddr_i;
2373 ddr_i = VEC_index (ddr_p, ddrs, i);
2374 found = false;
2376 for (j = 0; j < i; j++)
2378 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2380 if (vect_vfa_range_equal (ddr_i, ddr_j))
2382 if (vect_print_dump_info (REPORT_DR_DETAILS))
2384 fprintf (vect_dump, "found equal ranges ");
2385 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2386 fprintf (vect_dump, ", ");
2387 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2388 fprintf (vect_dump, " and ");
2389 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2390 fprintf (vect_dump, ", ");
2391 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2393 found = true;
2394 break;
2398 if (found)
2400 VEC_ordered_remove (ddr_p, ddrs, i);
2401 continue;
2403 i++;
2406 if (VEC_length (ddr_p, ddrs) >
2407 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2409 if (vect_print_dump_info (REPORT_DR_DETAILS))
2411 fprintf (vect_dump,
2412 "disable versioning for alias - max number of generated "
2413 "checks exceeded.");
2416 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2418 return false;
2421 return true;
2425 /* Function vect_analyze_data_refs.
2427 Find all the data references in the loop or basic block.
2429 The general structure of the analysis of data refs in the vectorizer is as
2430 follows:
2431 1- vect_analyze_data_refs(loop/bb): call
2432 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2433 in the loop/bb and their dependences.
2434 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2435 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2436 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2440 bool
2441 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2442 bb_vec_info bb_vinfo,
2443 int *min_vf)
2445 struct loop *loop = NULL;
2446 basic_block bb = NULL;
2447 unsigned int i;
2448 VEC (data_reference_p, heap) *datarefs;
2449 struct data_reference *dr;
2450 tree scalar_type;
2451 bool res;
2453 if (vect_print_dump_info (REPORT_DETAILS))
2454 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2456 if (loop_vinfo)
2458 loop = LOOP_VINFO_LOOP (loop_vinfo);
2459 res = compute_data_dependences_for_loop
2460 (loop, true,
2461 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2462 &LOOP_VINFO_DATAREFS (loop_vinfo),
2463 &LOOP_VINFO_DDRS (loop_vinfo));
2465 if (!res)
2467 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2468 fprintf (vect_dump, "not vectorized: loop contains function calls"
2469 " or data references that cannot be analyzed");
2470 return false;
2473 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2475 else
2477 bb = BB_VINFO_BB (bb_vinfo);
2478 res = compute_data_dependences_for_bb (bb, true,
2479 &BB_VINFO_DATAREFS (bb_vinfo),
2480 &BB_VINFO_DDRS (bb_vinfo));
2481 if (!res)
2483 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2484 fprintf (vect_dump, "not vectorized: basic block contains function"
2485 " calls or data references that cannot be analyzed");
2486 return false;
2489 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2492 /* Go through the data-refs, check that the analysis succeeded. Update
2493 pointer from stmt_vec_info struct to DR and vectype. */
2495 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2497 gimple stmt;
2498 stmt_vec_info stmt_info;
2499 tree base, offset, init;
2500 int vf;
2502 if (!dr || !DR_REF (dr))
2504 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2505 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2506 return false;
2509 stmt = DR_STMT (dr);
2510 stmt_info = vinfo_for_stmt (stmt);
2512 /* Check that analysis of the data-ref succeeded. */
2513 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2514 || !DR_STEP (dr))
2516 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2518 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2519 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2522 if (bb_vinfo)
2524 /* Mark the statement as not vectorizable. */
2525 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2526 continue;
2528 else
2529 return false;
2532 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2534 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2535 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2536 "constant");
2537 if (bb_vinfo)
2539 /* Mark the statement as not vectorizable. */
2540 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2541 continue;
2543 else
2544 return false;
2547 base = unshare_expr (DR_BASE_ADDRESS (dr));
2548 offset = unshare_expr (DR_OFFSET (dr));
2549 init = unshare_expr (DR_INIT (dr));
2551 if (stmt_can_throw_internal (stmt))
2553 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2555 fprintf (vect_dump, "not vectorized: statement can throw an "
2556 "exception ");
2557 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2559 return false;
2562 /* Update DR field in stmt_vec_info struct. */
2564 /* If the dataref is in an inner-loop of the loop that is considered for
2565 for vectorization, we also want to analyze the access relative to
2566 the outer-loop (DR contains information only relative to the
2567 inner-most enclosing loop). We do that by building a reference to the
2568 first location accessed by the inner-loop, and analyze it relative to
2569 the outer-loop. */
2570 if (loop && nested_in_vect_loop_p (loop, stmt))
2572 tree outer_step, outer_base, outer_init;
2573 HOST_WIDE_INT pbitsize, pbitpos;
2574 tree poffset;
2575 enum machine_mode pmode;
2576 int punsignedp, pvolatilep;
2577 affine_iv base_iv, offset_iv;
2578 tree dinit;
2580 /* Build a reference to the first location accessed by the
2581 inner-loop: *(BASE+INIT). (The first location is actually
2582 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2583 tree inner_base = build_fold_indirect_ref
2584 (fold_build2 (POINTER_PLUS_EXPR,
2585 TREE_TYPE (base), base,
2586 fold_convert (sizetype, init)));
2588 if (vect_print_dump_info (REPORT_DETAILS))
2590 fprintf (vect_dump, "analyze in outer-loop: ");
2591 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2594 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2595 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2596 gcc_assert (outer_base != NULL_TREE);
2598 if (pbitpos % BITS_PER_UNIT != 0)
2600 if (vect_print_dump_info (REPORT_DETAILS))
2601 fprintf (vect_dump, "failed: bit offset alignment.\n");
2602 return false;
2605 outer_base = build_fold_addr_expr (outer_base);
2606 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2607 &base_iv, false))
2609 if (vect_print_dump_info (REPORT_DETAILS))
2610 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2611 return false;
2614 if (offset)
2616 if (poffset)
2617 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2618 poffset);
2619 else
2620 poffset = offset;
2623 if (!poffset)
2625 offset_iv.base = ssize_int (0);
2626 offset_iv.step = ssize_int (0);
2628 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2629 &offset_iv, false))
2631 if (vect_print_dump_info (REPORT_DETAILS))
2632 fprintf (vect_dump, "evolution of offset is not affine.\n");
2633 return false;
2636 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2637 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2638 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2639 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2640 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2642 outer_step = size_binop (PLUS_EXPR,
2643 fold_convert (ssizetype, base_iv.step),
2644 fold_convert (ssizetype, offset_iv.step));
2646 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2647 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2648 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2649 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2650 STMT_VINFO_DR_OFFSET (stmt_info) =
2651 fold_convert (ssizetype, offset_iv.base);
2652 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2653 size_int (highest_pow2_factor (offset_iv.base));
2655 if (vect_print_dump_info (REPORT_DETAILS))
2657 fprintf (vect_dump, "\touter base_address: ");
2658 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2659 fprintf (vect_dump, "\n\touter offset from base address: ");
2660 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2661 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2662 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2663 fprintf (vect_dump, "\n\touter step: ");
2664 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2665 fprintf (vect_dump, "\n\touter aligned to: ");
2666 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2670 if (STMT_VINFO_DATA_REF (stmt_info))
2672 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2674 fprintf (vect_dump,
2675 "not vectorized: more than one data ref in stmt: ");
2676 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2678 return false;
2681 STMT_VINFO_DATA_REF (stmt_info) = dr;
2683 /* Set vectype for STMT. */
2684 scalar_type = TREE_TYPE (DR_REF (dr));
2685 STMT_VINFO_VECTYPE (stmt_info) =
2686 get_vectype_for_scalar_type (scalar_type);
2687 if (!STMT_VINFO_VECTYPE (stmt_info))
2689 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2691 fprintf (vect_dump,
2692 "not vectorized: no vectype for stmt: ");
2693 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2694 fprintf (vect_dump, " scalar_type: ");
2695 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2698 if (bb_vinfo)
2700 /* Mark the statement as not vectorizable. */
2701 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2702 continue;
2704 else
2705 return false;
2708 /* Adjust the minimal vectorization factor according to the
2709 vector type. */
2710 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2711 if (vf > *min_vf)
2712 *min_vf = vf;
2715 return true;
2719 /* Function vect_get_new_vect_var.
2721 Returns a name for a new variable. The current naming scheme appends the
2722 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2723 the name of vectorizer generated variables, and appends that to NAME if
2724 provided. */
2726 tree
2727 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2729 const char *prefix;
2730 tree new_vect_var;
2732 switch (var_kind)
2734 case vect_simple_var:
2735 prefix = "vect_";
2736 break;
2737 case vect_scalar_var:
2738 prefix = "stmp_";
2739 break;
2740 case vect_pointer_var:
2741 prefix = "vect_p";
2742 break;
2743 default:
2744 gcc_unreachable ();
2747 if (name)
2749 char* tmp = concat (prefix, name, NULL);
2750 new_vect_var = create_tmp_var (type, tmp);
2751 free (tmp);
2753 else
2754 new_vect_var = create_tmp_var (type, prefix);
2756 /* Mark vector typed variable as a gimple register variable. */
2757 if (TREE_CODE (type) == VECTOR_TYPE)
2758 DECL_GIMPLE_REG_P (new_vect_var) = true;
2760 return new_vect_var;
2764 /* Function vect_create_addr_base_for_vector_ref.
2766 Create an expression that computes the address of the first memory location
2767 that will be accessed for a data reference.
2769 Input:
2770 STMT: The statement containing the data reference.
2771 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2772 OFFSET: Optional. If supplied, it is be added to the initial address.
2773 LOOP: Specify relative to which loop-nest should the address be computed.
2774 For example, when the dataref is in an inner-loop nested in an
2775 outer-loop that is now being vectorized, LOOP can be either the
2776 outer-loop, or the inner-loop. The first memory location accessed
2777 by the following dataref ('in' points to short):
2779 for (i=0; i<N; i++)
2780 for (j=0; j<M; j++)
2781 s += in[i+j]
2783 is as follows:
2784 if LOOP=i_loop: &in (relative to i_loop)
2785 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2787 Output:
2788 1. Return an SSA_NAME whose value is the address of the memory location of
2789 the first vector of the data reference.
2790 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2791 these statement(s) which define the returned SSA_NAME.
2793 FORNOW: We are only handling array accesses with step 1. */
2795 tree
2796 vect_create_addr_base_for_vector_ref (gimple stmt,
2797 gimple_seq *new_stmt_list,
2798 tree offset,
2799 struct loop *loop)
2801 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2802 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2803 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2804 tree base_name;
2805 tree data_ref_base_var;
2806 tree vec_stmt;
2807 tree addr_base, addr_expr;
2808 tree dest;
2809 gimple_seq seq = NULL;
2810 tree base_offset = unshare_expr (DR_OFFSET (dr));
2811 tree init = unshare_expr (DR_INIT (dr));
2812 tree vect_ptr_type;
2813 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2814 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2815 tree base;
2817 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2819 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2821 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2823 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2824 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2825 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2828 if (loop_vinfo)
2829 base_name = build_fold_indirect_ref (data_ref_base);
2830 else
2832 base_offset = ssize_int (0);
2833 init = ssize_int (0);
2834 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2837 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2838 add_referenced_var (data_ref_base_var);
2839 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2840 data_ref_base_var);
2841 gimple_seq_add_seq (new_stmt_list, seq);
2843 /* Create base_offset */
2844 base_offset = size_binop (PLUS_EXPR,
2845 fold_convert (sizetype, base_offset),
2846 fold_convert (sizetype, init));
2847 dest = create_tmp_var (sizetype, "base_off");
2848 add_referenced_var (dest);
2849 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2850 gimple_seq_add_seq (new_stmt_list, seq);
2852 if (offset)
2854 tree tmp = create_tmp_var (sizetype, "offset");
2856 add_referenced_var (tmp);
2857 offset = fold_build2 (MULT_EXPR, sizetype,
2858 fold_convert (sizetype, offset), step);
2859 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2860 base_offset, offset);
2861 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2862 gimple_seq_add_seq (new_stmt_list, seq);
2865 /* base + base_offset */
2866 if (loop_vinfo)
2867 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2868 data_ref_base, base_offset);
2869 else
2871 addr_base = build1 (ADDR_EXPR,
2872 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2873 unshare_expr (DR_REF (dr)));
2876 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2877 base = get_base_address (DR_REF (dr));
2878 if (base
2879 && TREE_CODE (base) == MEM_REF)
2880 vect_ptr_type
2881 = build_qualified_type (vect_ptr_type,
2882 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2884 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2885 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2886 get_name (base_name));
2887 add_referenced_var (addr_expr);
2888 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2889 gimple_seq_add_seq (new_stmt_list, seq);
2891 if (DR_PTR_INFO (dr)
2892 && TREE_CODE (vec_stmt) == SSA_NAME)
2894 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2895 if (offset)
2897 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2898 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2902 if (vect_print_dump_info (REPORT_DETAILS))
2904 fprintf (vect_dump, "created ");
2905 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2908 return vec_stmt;
2912 /* Function vect_create_data_ref_ptr.
2914 Create a new pointer to vector type (vp), that points to the first location
2915 accessed in the loop by STMT, along with the def-use update chain to
2916 appropriately advance the pointer through the loop iterations. Also set
2917 aliasing information for the pointer. This vector pointer is used by the
2918 callers to this function to create a memory reference expression for vector
2919 load/store access.
2921 Input:
2922 1. STMT: a stmt that references memory. Expected to be of the form
2923 GIMPLE_ASSIGN <name, data-ref> or
2924 GIMPLE_ASSIGN <data-ref, name>.
2925 2. AT_LOOP: the loop where the vector memref is to be created.
2926 3. OFFSET (optional): an offset to be added to the initial address accessed
2927 by the data-ref in STMT.
2928 4. BSI: location where the new stmts are to be placed if there is no loop
2929 5. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2930 pointing to the initial address.
2931 6. TYPE: if not NULL indicates the required type of the data-ref.
2933 Output:
2934 1. Declare a new ptr to vector_type, and have it point to the base of the
2935 data reference (initial addressed accessed by the data reference).
2936 For example, for vector of type V8HI, the following code is generated:
2938 v8hi *vp;
2939 vp = (v8hi *)initial_address;
2941 if OFFSET is not supplied:
2942 initial_address = &a[init];
2943 if OFFSET is supplied:
2944 initial_address = &a[init + OFFSET];
2946 Return the initial_address in INITIAL_ADDRESS.
2948 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2949 update the pointer in each iteration of the loop.
2951 Return the increment stmt that updates the pointer in PTR_INCR.
2953 3. Set INV_P to true if the access pattern of the data reference in the
2954 vectorized loop is invariant. Set it to false otherwise.
2956 4. Return the pointer. */
2958 tree
2959 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, tree offset,
2960 tree *initial_address, gimple_stmt_iterator *gsi,
2961 gimple *ptr_incr, bool only_init, bool *inv_p)
2963 tree base_name;
2964 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2965 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2966 struct loop *loop = NULL;
2967 bool nested_in_vect_loop = false;
2968 struct loop *containing_loop = NULL;
2969 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2970 tree vect_ptr_type;
2971 tree vect_ptr;
2972 tree new_temp;
2973 gimple vec_stmt;
2974 gimple_seq new_stmt_list = NULL;
2975 edge pe = NULL;
2976 basic_block new_bb;
2977 tree vect_ptr_init;
2978 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2979 tree vptr;
2980 gimple_stmt_iterator incr_gsi;
2981 bool insert_after;
2982 bool negative;
2983 tree indx_before_incr, indx_after_incr;
2984 gimple incr;
2985 tree step;
2986 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2987 tree base;
2989 if (loop_vinfo)
2991 loop = LOOP_VINFO_LOOP (loop_vinfo);
2992 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2993 containing_loop = (gimple_bb (stmt))->loop_father;
2994 pe = loop_preheader_edge (loop);
2996 else
2998 gcc_assert (bb_vinfo);
2999 only_init = true;
3000 *ptr_incr = NULL;
3003 /* Check the step (evolution) of the load in LOOP, and record
3004 whether it's invariant. */
3005 if (nested_in_vect_loop)
3006 step = STMT_VINFO_DR_STEP (stmt_info);
3007 else
3008 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3010 if (tree_int_cst_compare (step, size_zero_node) == 0)
3011 *inv_p = true;
3012 else
3013 *inv_p = false;
3014 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3016 /* Create an expression for the first address accessed by this load
3017 in LOOP. */
3018 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3020 if (vect_print_dump_info (REPORT_DETAILS))
3022 tree data_ref_base = base_name;
3023 fprintf (vect_dump, "create vector-pointer variable to type: ");
3024 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3025 if (TREE_CODE (data_ref_base) == VAR_DECL
3026 || TREE_CODE (data_ref_base) == ARRAY_REF)
3027 fprintf (vect_dump, " vectorizing an array ref: ");
3028 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3029 fprintf (vect_dump, " vectorizing a record based array ref: ");
3030 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3031 fprintf (vect_dump, " vectorizing a pointer ref: ");
3032 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3035 /* (1) Create the new vector-pointer variable. */
3036 vect_ptr_type = build_pointer_type (vectype);
3037 base = get_base_address (DR_REF (dr));
3038 if (base
3039 && TREE_CODE (base) == MEM_REF)
3040 vect_ptr_type
3041 = build_qualified_type (vect_ptr_type,
3042 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3043 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3044 get_name (base_name));
3046 /* Vector types inherit the alias set of their component type by default so
3047 we need to use a ref-all pointer if the data reference does not conflict
3048 with the created vector data reference because it is not addressable. */
3049 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3050 get_alias_set (DR_REF (dr))))
3052 vect_ptr_type
3053 = build_pointer_type_for_mode (vectype,
3054 TYPE_MODE (vect_ptr_type), true);
3055 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3056 get_name (base_name));
3059 /* Likewise for any of the data references in the stmt group. */
3060 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
3062 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
3065 tree lhs = gimple_assign_lhs (orig_stmt);
3066 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3067 get_alias_set (lhs)))
3069 vect_ptr_type
3070 = build_pointer_type_for_mode (vectype,
3071 TYPE_MODE (vect_ptr_type), true);
3072 vect_ptr
3073 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3074 get_name (base_name));
3075 break;
3078 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
3080 while (orig_stmt);
3083 add_referenced_var (vect_ptr);
3085 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3086 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3087 def-use update cycles for the pointer: one relative to the outer-loop
3088 (LOOP), which is what steps (3) and (4) below do. The other is relative
3089 to the inner-loop (which is the inner-most loop containing the dataref),
3090 and this is done be step (5) below.
3092 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3093 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3094 redundant. Steps (3),(4) create the following:
3096 vp0 = &base_addr;
3097 LOOP: vp1 = phi(vp0,vp2)
3100 vp2 = vp1 + step
3101 goto LOOP
3103 If there is an inner-loop nested in loop, then step (5) will also be
3104 applied, and an additional update in the inner-loop will be created:
3106 vp0 = &base_addr;
3107 LOOP: vp1 = phi(vp0,vp2)
3109 inner: vp3 = phi(vp1,vp4)
3110 vp4 = vp3 + inner_step
3111 if () goto inner
3113 vp2 = vp1 + step
3114 if () goto LOOP */
3116 /* (2) Calculate the initial address the vector-pointer, and set
3117 the vector-pointer to point to it before the loop. */
3119 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3121 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3122 offset, loop);
3123 if (new_stmt_list)
3125 if (pe)
3127 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3128 gcc_assert (!new_bb);
3130 else
3131 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3134 *initial_address = new_temp;
3136 /* Create: p = (vectype *) initial_base */
3137 if (TREE_CODE (new_temp) != SSA_NAME
3138 || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
3140 vec_stmt = gimple_build_assign (vect_ptr,
3141 fold_convert (vect_ptr_type, new_temp));
3142 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
3143 /* Copy the points-to information if it exists. */
3144 if (DR_PTR_INFO (dr))
3145 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
3146 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
3147 if (pe)
3149 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3150 gcc_assert (!new_bb);
3152 else
3153 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3155 else
3156 vect_ptr_init = new_temp;
3158 /* (3) Handle the updating of the vector-pointer inside the loop.
3159 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3160 inner-loop nested in LOOP (during outer-loop vectorization). */
3162 /* No update in loop is required. */
3163 if (only_init && (!loop_vinfo || at_loop == loop))
3164 vptr = vect_ptr_init;
3165 else
3167 /* The step of the vector pointer is the Vector Size. */
3168 tree step = TYPE_SIZE_UNIT (vectype);
3169 /* One exception to the above is when the scalar step of the load in
3170 LOOP is zero. In this case the step here is also zero. */
3171 if (*inv_p)
3172 step = size_zero_node;
3173 else if (negative)
3174 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3176 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3178 create_iv (vect_ptr_init,
3179 fold_convert (vect_ptr_type, step),
3180 vect_ptr, loop, &incr_gsi, insert_after,
3181 &indx_before_incr, &indx_after_incr);
3182 incr = gsi_stmt (incr_gsi);
3183 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3185 /* Copy the points-to information if it exists. */
3186 if (DR_PTR_INFO (dr))
3188 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3189 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3191 if (ptr_incr)
3192 *ptr_incr = incr;
3194 vptr = indx_before_incr;
3197 if (!nested_in_vect_loop || only_init)
3198 return vptr;
3201 /* (4) Handle the updating of the vector-pointer inside the inner-loop
3202 nested in LOOP, if exists. */
3204 gcc_assert (nested_in_vect_loop);
3205 if (!only_init)
3207 standard_iv_increment_position (containing_loop, &incr_gsi,
3208 &insert_after);
3209 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
3210 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3211 &indx_after_incr);
3212 incr = gsi_stmt (incr_gsi);
3213 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3215 /* Copy the points-to information if it exists. */
3216 if (DR_PTR_INFO (dr))
3218 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3219 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3221 if (ptr_incr)
3222 *ptr_incr = incr;
3224 return indx_before_incr;
3226 else
3227 gcc_unreachable ();
3231 /* Function bump_vector_ptr
3233 Increment a pointer (to a vector type) by vector-size. If requested,
3234 i.e. if PTR-INCR is given, then also connect the new increment stmt
3235 to the existing def-use update-chain of the pointer, by modifying
3236 the PTR_INCR as illustrated below:
3238 The pointer def-use update-chain before this function:
3239 DATAREF_PTR = phi (p_0, p_2)
3240 ....
3241 PTR_INCR: p_2 = DATAREF_PTR + step
3243 The pointer def-use update-chain after this function:
3244 DATAREF_PTR = phi (p_0, p_2)
3245 ....
3246 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3247 ....
3248 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3250 Input:
3251 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3252 in the loop.
3253 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3254 the loop. The increment amount across iterations is expected
3255 to be vector_size.
3256 BSI - location where the new update stmt is to be placed.
3257 STMT - the original scalar memory-access stmt that is being vectorized.
3258 BUMP - optional. The offset by which to bump the pointer. If not given,
3259 the offset is assumed to be vector_size.
3261 Output: Return NEW_DATAREF_PTR as illustrated above.
3265 tree
3266 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3267 gimple stmt, tree bump)
3269 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3270 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3271 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3272 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3273 tree update = TYPE_SIZE_UNIT (vectype);
3274 gimple incr_stmt;
3275 ssa_op_iter iter;
3276 use_operand_p use_p;
3277 tree new_dataref_ptr;
3279 if (bump)
3280 update = bump;
3282 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3283 dataref_ptr, update);
3284 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3285 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3286 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3288 /* Copy the points-to information if it exists. */
3289 if (DR_PTR_INFO (dr))
3291 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3292 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3293 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3296 if (!ptr_incr)
3297 return new_dataref_ptr;
3299 /* Update the vector-pointer's cross-iteration increment. */
3300 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3302 tree use = USE_FROM_PTR (use_p);
3304 if (use == dataref_ptr)
3305 SET_USE (use_p, new_dataref_ptr);
3306 else
3307 gcc_assert (tree_int_cst_compare (use, update) == 0);
3310 return new_dataref_ptr;
3314 /* Function vect_create_destination_var.
3316 Create a new temporary of type VECTYPE. */
3318 tree
3319 vect_create_destination_var (tree scalar_dest, tree vectype)
3321 tree vec_dest;
3322 const char *new_name;
3323 tree type;
3324 enum vect_var_kind kind;
3326 kind = vectype ? vect_simple_var : vect_scalar_var;
3327 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3329 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3331 new_name = get_name (scalar_dest);
3332 if (!new_name)
3333 new_name = "var_";
3334 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3335 add_referenced_var (vec_dest);
3337 return vec_dest;
3340 /* Function vect_strided_store_supported.
3342 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3343 and FALSE otherwise. */
3345 bool
3346 vect_strided_store_supported (tree vectype)
3348 optab interleave_high_optab, interleave_low_optab;
3349 enum machine_mode mode;
3351 mode = TYPE_MODE (vectype);
3353 /* Check that the operation is supported. */
3354 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3355 vectype, optab_default);
3356 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3357 vectype, optab_default);
3358 if (!interleave_high_optab || !interleave_low_optab)
3360 if (vect_print_dump_info (REPORT_DETAILS))
3361 fprintf (vect_dump, "no optab for interleave.");
3362 return false;
3365 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3366 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3368 if (vect_print_dump_info (REPORT_DETAILS))
3369 fprintf (vect_dump, "interleave op not supported by target.");
3370 return false;
3373 return true;
3377 /* Function vect_permute_store_chain.
3379 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3380 a power of 2, generate interleave_high/low stmts to reorder the data
3381 correctly for the stores. Return the final references for stores in
3382 RESULT_CHAIN.
3384 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3385 The input is 4 vectors each containing 8 elements. We assign a number to
3386 each element, the input sequence is:
3388 1st vec: 0 1 2 3 4 5 6 7
3389 2nd vec: 8 9 10 11 12 13 14 15
3390 3rd vec: 16 17 18 19 20 21 22 23
3391 4th vec: 24 25 26 27 28 29 30 31
3393 The output sequence should be:
3395 1st vec: 0 8 16 24 1 9 17 25
3396 2nd vec: 2 10 18 26 3 11 19 27
3397 3rd vec: 4 12 20 28 5 13 21 30
3398 4th vec: 6 14 22 30 7 15 23 31
3400 i.e., we interleave the contents of the four vectors in their order.
3402 We use interleave_high/low instructions to create such output. The input of
3403 each interleave_high/low operation is two vectors:
3404 1st vec 2nd vec
3405 0 1 2 3 4 5 6 7
3406 the even elements of the result vector are obtained left-to-right from the
3407 high/low elements of the first vector. The odd elements of the result are
3408 obtained left-to-right from the high/low elements of the second vector.
3409 The output of interleave_high will be: 0 4 1 5
3410 and of interleave_low: 2 6 3 7
3413 The permutation is done in log LENGTH stages. In each stage interleave_high
3414 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3415 where the first argument is taken from the first half of DR_CHAIN and the
3416 second argument from it's second half.
3417 In our example,
3419 I1: interleave_high (1st vec, 3rd vec)
3420 I2: interleave_low (1st vec, 3rd vec)
3421 I3: interleave_high (2nd vec, 4th vec)
3422 I4: interleave_low (2nd vec, 4th vec)
3424 The output for the first stage is:
3426 I1: 0 16 1 17 2 18 3 19
3427 I2: 4 20 5 21 6 22 7 23
3428 I3: 8 24 9 25 10 26 11 27
3429 I4: 12 28 13 29 14 30 15 31
3431 The output of the second stage, i.e. the final result is:
3433 I1: 0 8 16 24 1 9 17 25
3434 I2: 2 10 18 26 3 11 19 27
3435 I3: 4 12 20 28 5 13 21 30
3436 I4: 6 14 22 30 7 15 23 31. */
3438 bool
3439 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3440 unsigned int length,
3441 gimple stmt,
3442 gimple_stmt_iterator *gsi,
3443 VEC(tree,heap) **result_chain)
3445 tree perm_dest, vect1, vect2, high, low;
3446 gimple perm_stmt;
3447 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3448 int i;
3449 unsigned int j;
3450 enum tree_code high_code, low_code;
3452 /* Check that the operation is supported. */
3453 if (!vect_strided_store_supported (vectype))
3454 return false;
3456 *result_chain = VEC_copy (tree, heap, dr_chain);
3458 for (i = 0; i < exact_log2 (length); i++)
3460 for (j = 0; j < length/2; j++)
3462 vect1 = VEC_index (tree, dr_chain, j);
3463 vect2 = VEC_index (tree, dr_chain, j+length/2);
3465 /* Create interleaving stmt:
3466 in the case of big endian:
3467 high = interleave_high (vect1, vect2)
3468 and in the case of little endian:
3469 high = interleave_low (vect1, vect2). */
3470 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3471 DECL_GIMPLE_REG_P (perm_dest) = 1;
3472 add_referenced_var (perm_dest);
3473 if (BYTES_BIG_ENDIAN)
3475 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3476 low_code = VEC_INTERLEAVE_LOW_EXPR;
3478 else
3480 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3481 high_code = VEC_INTERLEAVE_LOW_EXPR;
3483 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3484 vect1, vect2);
3485 high = make_ssa_name (perm_dest, perm_stmt);
3486 gimple_assign_set_lhs (perm_stmt, high);
3487 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3488 VEC_replace (tree, *result_chain, 2*j, high);
3490 /* Create interleaving stmt:
3491 in the case of big endian:
3492 low = interleave_low (vect1, vect2)
3493 and in the case of little endian:
3494 low = interleave_high (vect1, vect2). */
3495 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3496 DECL_GIMPLE_REG_P (perm_dest) = 1;
3497 add_referenced_var (perm_dest);
3498 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3499 vect1, vect2);
3500 low = make_ssa_name (perm_dest, perm_stmt);
3501 gimple_assign_set_lhs (perm_stmt, low);
3502 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3503 VEC_replace (tree, *result_chain, 2*j+1, low);
3505 dr_chain = VEC_copy (tree, heap, *result_chain);
3507 return true;
3510 /* Function vect_setup_realignment
3512 This function is called when vectorizing an unaligned load using
3513 the dr_explicit_realign[_optimized] scheme.
3514 This function generates the following code at the loop prolog:
3516 p = initial_addr;
3517 x msq_init = *(floor(p)); # prolog load
3518 realignment_token = call target_builtin;
3519 loop:
3520 x msq = phi (msq_init, ---)
3522 The stmts marked with x are generated only for the case of
3523 dr_explicit_realign_optimized.
3525 The code above sets up a new (vector) pointer, pointing to the first
3526 location accessed by STMT, and a "floor-aligned" load using that pointer.
3527 It also generates code to compute the "realignment-token" (if the relevant
3528 target hook was defined), and creates a phi-node at the loop-header bb
3529 whose arguments are the result of the prolog-load (created by this
3530 function) and the result of a load that takes place in the loop (to be
3531 created by the caller to this function).
3533 For the case of dr_explicit_realign_optimized:
3534 The caller to this function uses the phi-result (msq) to create the
3535 realignment code inside the loop, and sets up the missing phi argument,
3536 as follows:
3537 loop:
3538 msq = phi (msq_init, lsq)
3539 lsq = *(floor(p')); # load in loop
3540 result = realign_load (msq, lsq, realignment_token);
3542 For the case of dr_explicit_realign:
3543 loop:
3544 msq = *(floor(p)); # load in loop
3545 p' = p + (VS-1);
3546 lsq = *(floor(p')); # load in loop
3547 result = realign_load (msq, lsq, realignment_token);
3549 Input:
3550 STMT - (scalar) load stmt to be vectorized. This load accesses
3551 a memory location that may be unaligned.
3552 BSI - place where new code is to be inserted.
3553 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3554 is used.
3556 Output:
3557 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3558 target hook, if defined.
3559 Return value - the result of the loop-header phi node. */
3561 tree
3562 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3563 tree *realignment_token,
3564 enum dr_alignment_support alignment_support_scheme,
3565 tree init_addr,
3566 struct loop **at_loop)
3568 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3569 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3570 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3571 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3572 struct loop *loop = NULL;
3573 edge pe = NULL;
3574 tree scalar_dest = gimple_assign_lhs (stmt);
3575 tree vec_dest;
3576 gimple inc;
3577 tree ptr;
3578 tree data_ref;
3579 gimple new_stmt;
3580 basic_block new_bb;
3581 tree msq_init = NULL_TREE;
3582 tree new_temp;
3583 gimple phi_stmt;
3584 tree msq = NULL_TREE;
3585 gimple_seq stmts = NULL;
3586 bool inv_p;
3587 bool compute_in_loop = false;
3588 bool nested_in_vect_loop = false;
3589 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3590 struct loop *loop_for_initial_load = NULL;
3592 if (loop_vinfo)
3594 loop = LOOP_VINFO_LOOP (loop_vinfo);
3595 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3598 gcc_assert (alignment_support_scheme == dr_explicit_realign
3599 || alignment_support_scheme == dr_explicit_realign_optimized);
3601 /* We need to generate three things:
3602 1. the misalignment computation
3603 2. the extra vector load (for the optimized realignment scheme).
3604 3. the phi node for the two vectors from which the realignment is
3605 done (for the optimized realignment scheme). */
3607 /* 1. Determine where to generate the misalignment computation.
3609 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3610 calculation will be generated by this function, outside the loop (in the
3611 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3612 caller, inside the loop.
3614 Background: If the misalignment remains fixed throughout the iterations of
3615 the loop, then both realignment schemes are applicable, and also the
3616 misalignment computation can be done outside LOOP. This is because we are
3617 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3618 are a multiple of VS (the Vector Size), and therefore the misalignment in
3619 different vectorized LOOP iterations is always the same.
3620 The problem arises only if the memory access is in an inner-loop nested
3621 inside LOOP, which is now being vectorized using outer-loop vectorization.
3622 This is the only case when the misalignment of the memory access may not
3623 remain fixed throughout the iterations of the inner-loop (as explained in
3624 detail in vect_supportable_dr_alignment). In this case, not only is the
3625 optimized realignment scheme not applicable, but also the misalignment
3626 computation (and generation of the realignment token that is passed to
3627 REALIGN_LOAD) have to be done inside the loop.
3629 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3630 or not, which in turn determines if the misalignment is computed inside
3631 the inner-loop, or outside LOOP. */
3633 if (init_addr != NULL_TREE || !loop_vinfo)
3635 compute_in_loop = true;
3636 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3640 /* 2. Determine where to generate the extra vector load.
3642 For the optimized realignment scheme, instead of generating two vector
3643 loads in each iteration, we generate a single extra vector load in the
3644 preheader of the loop, and in each iteration reuse the result of the
3645 vector load from the previous iteration. In case the memory access is in
3646 an inner-loop nested inside LOOP, which is now being vectorized using
3647 outer-loop vectorization, we need to determine whether this initial vector
3648 load should be generated at the preheader of the inner-loop, or can be
3649 generated at the preheader of LOOP. If the memory access has no evolution
3650 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3651 to be generated inside LOOP (in the preheader of the inner-loop). */
3653 if (nested_in_vect_loop)
3655 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3656 bool invariant_in_outerloop =
3657 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3658 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3660 else
3661 loop_for_initial_load = loop;
3662 if (at_loop)
3663 *at_loop = loop_for_initial_load;
3665 if (loop_for_initial_load)
3666 pe = loop_preheader_edge (loop_for_initial_load);
3668 /* 3. For the case of the optimized realignment, create the first vector
3669 load at the loop preheader. */
3671 if (alignment_support_scheme == dr_explicit_realign_optimized)
3673 /* Create msq_init = *(floor(p1)) in the loop preheader */
3675 gcc_assert (!compute_in_loop);
3676 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3677 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3678 &init_addr, NULL, &inc, true, &inv_p);
3679 new_stmt = gimple_build_assign_with_ops
3680 (BIT_AND_EXPR, NULL_TREE, ptr,
3681 build_int_cst (TREE_TYPE (ptr),
3682 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3683 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3684 gimple_assign_set_lhs (new_stmt, new_temp);
3685 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3686 gcc_assert (!new_bb);
3687 data_ref
3688 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3689 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3690 new_stmt = gimple_build_assign (vec_dest, data_ref);
3691 new_temp = make_ssa_name (vec_dest, new_stmt);
3692 gimple_assign_set_lhs (new_stmt, new_temp);
3693 mark_symbols_for_renaming (new_stmt);
3694 if (pe)
3696 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3697 gcc_assert (!new_bb);
3699 else
3700 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3702 msq_init = gimple_assign_lhs (new_stmt);
3705 /* 4. Create realignment token using a target builtin, if available.
3706 It is done either inside the containing loop, or before LOOP (as
3707 determined above). */
3709 if (targetm.vectorize.builtin_mask_for_load)
3711 tree builtin_decl;
3713 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3714 if (!init_addr)
3716 /* Generate the INIT_ADDR computation outside LOOP. */
3717 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3718 NULL_TREE, loop);
3719 if (loop)
3721 pe = loop_preheader_edge (loop);
3722 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3723 gcc_assert (!new_bb);
3725 else
3726 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3729 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3730 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3731 vec_dest =
3732 vect_create_destination_var (scalar_dest,
3733 gimple_call_return_type (new_stmt));
3734 new_temp = make_ssa_name (vec_dest, new_stmt);
3735 gimple_call_set_lhs (new_stmt, new_temp);
3737 if (compute_in_loop)
3738 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3739 else
3741 /* Generate the misalignment computation outside LOOP. */
3742 pe = loop_preheader_edge (loop);
3743 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3744 gcc_assert (!new_bb);
3747 *realignment_token = gimple_call_lhs (new_stmt);
3749 /* The result of the CALL_EXPR to this builtin is determined from
3750 the value of the parameter and no global variables are touched
3751 which makes the builtin a "const" function. Requiring the
3752 builtin to have the "const" attribute makes it unnecessary
3753 to call mark_call_clobbered. */
3754 gcc_assert (TREE_READONLY (builtin_decl));
3757 if (alignment_support_scheme == dr_explicit_realign)
3758 return msq;
3760 gcc_assert (!compute_in_loop);
3761 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3764 /* 5. Create msq = phi <msq_init, lsq> in loop */
3766 pe = loop_preheader_edge (containing_loop);
3767 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3768 msq = make_ssa_name (vec_dest, NULL);
3769 phi_stmt = create_phi_node (msq, containing_loop->header);
3770 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3771 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3773 return msq;
3777 /* Function vect_strided_load_supported.
3779 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3780 and FALSE otherwise. */
3782 bool
3783 vect_strided_load_supported (tree vectype)
3785 optab perm_even_optab, perm_odd_optab;
3786 enum machine_mode mode;
3788 mode = TYPE_MODE (vectype);
3790 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3791 optab_default);
3792 if (!perm_even_optab)
3794 if (vect_print_dump_info (REPORT_DETAILS))
3795 fprintf (vect_dump, "no optab for perm_even.");
3796 return false;
3799 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3801 if (vect_print_dump_info (REPORT_DETAILS))
3802 fprintf (vect_dump, "perm_even op not supported by target.");
3803 return false;
3806 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3807 optab_default);
3808 if (!perm_odd_optab)
3810 if (vect_print_dump_info (REPORT_DETAILS))
3811 fprintf (vect_dump, "no optab for perm_odd.");
3812 return false;
3815 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3817 if (vect_print_dump_info (REPORT_DETAILS))
3818 fprintf (vect_dump, "perm_odd op not supported by target.");
3819 return false;
3821 return true;
3825 /* Function vect_permute_load_chain.
3827 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3828 a power of 2, generate extract_even/odd stmts to reorder the input data
3829 correctly. Return the final references for loads in RESULT_CHAIN.
3831 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3832 The input is 4 vectors each containing 8 elements. We assign a number to each
3833 element, the input sequence is:
3835 1st vec: 0 1 2 3 4 5 6 7
3836 2nd vec: 8 9 10 11 12 13 14 15
3837 3rd vec: 16 17 18 19 20 21 22 23
3838 4th vec: 24 25 26 27 28 29 30 31
3840 The output sequence should be:
3842 1st vec: 0 4 8 12 16 20 24 28
3843 2nd vec: 1 5 9 13 17 21 25 29
3844 3rd vec: 2 6 10 14 18 22 26 30
3845 4th vec: 3 7 11 15 19 23 27 31
3847 i.e., the first output vector should contain the first elements of each
3848 interleaving group, etc.
3850 We use extract_even/odd instructions to create such output. The input of
3851 each extract_even/odd operation is two vectors
3852 1st vec 2nd vec
3853 0 1 2 3 4 5 6 7
3855 and the output is the vector of extracted even/odd elements. The output of
3856 extract_even will be: 0 2 4 6
3857 and of extract_odd: 1 3 5 7
3860 The permutation is done in log LENGTH stages. In each stage extract_even
3861 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3862 their order. In our example,
3864 E1: extract_even (1st vec, 2nd vec)
3865 E2: extract_odd (1st vec, 2nd vec)
3866 E3: extract_even (3rd vec, 4th vec)
3867 E4: extract_odd (3rd vec, 4th vec)
3869 The output for the first stage will be:
3871 E1: 0 2 4 6 8 10 12 14
3872 E2: 1 3 5 7 9 11 13 15
3873 E3: 16 18 20 22 24 26 28 30
3874 E4: 17 19 21 23 25 27 29 31
3876 In order to proceed and create the correct sequence for the next stage (or
3877 for the correct output, if the second stage is the last one, as in our
3878 example), we first put the output of extract_even operation and then the
3879 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3880 The input for the second stage is:
3882 1st vec (E1): 0 2 4 6 8 10 12 14
3883 2nd vec (E3): 16 18 20 22 24 26 28 30
3884 3rd vec (E2): 1 3 5 7 9 11 13 15
3885 4th vec (E4): 17 19 21 23 25 27 29 31
3887 The output of the second stage:
3889 E1: 0 4 8 12 16 20 24 28
3890 E2: 2 6 10 14 18 22 26 30
3891 E3: 1 5 9 13 17 21 25 29
3892 E4: 3 7 11 15 19 23 27 31
3894 And RESULT_CHAIN after reordering:
3896 1st vec (E1): 0 4 8 12 16 20 24 28
3897 2nd vec (E3): 1 5 9 13 17 21 25 29
3898 3rd vec (E2): 2 6 10 14 18 22 26 30
3899 4th vec (E4): 3 7 11 15 19 23 27 31. */
3901 bool
3902 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3903 unsigned int length,
3904 gimple stmt,
3905 gimple_stmt_iterator *gsi,
3906 VEC(tree,heap) **result_chain)
3908 tree perm_dest, data_ref, first_vect, second_vect;
3909 gimple perm_stmt;
3910 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3911 int i;
3912 unsigned int j;
3914 /* Check that the operation is supported. */
3915 if (!vect_strided_load_supported (vectype))
3916 return false;
3918 *result_chain = VEC_copy (tree, heap, dr_chain);
3919 for (i = 0; i < exact_log2 (length); i++)
3921 for (j = 0; j < length; j +=2)
3923 first_vect = VEC_index (tree, dr_chain, j);
3924 second_vect = VEC_index (tree, dr_chain, j+1);
3926 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3927 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3928 DECL_GIMPLE_REG_P (perm_dest) = 1;
3929 add_referenced_var (perm_dest);
3931 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3932 perm_dest, first_vect,
3933 second_vect);
3935 data_ref = make_ssa_name (perm_dest, perm_stmt);
3936 gimple_assign_set_lhs (perm_stmt, data_ref);
3937 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3938 mark_symbols_for_renaming (perm_stmt);
3940 VEC_replace (tree, *result_chain, j/2, data_ref);
3942 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3943 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3944 DECL_GIMPLE_REG_P (perm_dest) = 1;
3945 add_referenced_var (perm_dest);
3947 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3948 perm_dest, first_vect,
3949 second_vect);
3950 data_ref = make_ssa_name (perm_dest, perm_stmt);
3951 gimple_assign_set_lhs (perm_stmt, data_ref);
3952 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3953 mark_symbols_for_renaming (perm_stmt);
3955 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3957 dr_chain = VEC_copy (tree, heap, *result_chain);
3959 return true;
3963 /* Function vect_transform_strided_load.
3965 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3966 to perform their permutation and ascribe the result vectorized statements to
3967 the scalar statements.
3970 bool
3971 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3972 gimple_stmt_iterator *gsi)
3974 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3975 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3976 gimple next_stmt, new_stmt;
3977 VEC(tree,heap) *result_chain = NULL;
3978 unsigned int i, gap_count;
3979 tree tmp_data_ref;
3981 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3982 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3983 vectors, that are ready for vector computation. */
3984 result_chain = VEC_alloc (tree, heap, size);
3985 /* Permute. */
3986 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3987 return false;
3989 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3990 Since we scan the chain starting from it's first node, their order
3991 corresponds the order of data-refs in RESULT_CHAIN. */
3992 next_stmt = first_stmt;
3993 gap_count = 1;
3994 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
3996 if (!next_stmt)
3997 break;
3999 /* Skip the gaps. Loads created for the gaps will be removed by dead
4000 code elimination pass later. No need to check for the first stmt in
4001 the group, since it always exists.
4002 DR_GROUP_GAP is the number of steps in elements from the previous
4003 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4004 correspond to the gaps. */
4005 if (next_stmt != first_stmt
4006 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4008 gap_count++;
4009 continue;
4012 while (next_stmt)
4014 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4015 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4016 copies, and we put the new vector statement in the first available
4017 RELATED_STMT. */
4018 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4019 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4020 else
4022 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4024 gimple prev_stmt =
4025 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4026 gimple rel_stmt =
4027 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4028 while (rel_stmt)
4030 prev_stmt = rel_stmt;
4031 rel_stmt =
4032 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4035 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4036 new_stmt;
4040 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4041 gap_count = 1;
4042 /* If NEXT_STMT accesses the same DR as the previous statement,
4043 put the same TMP_DATA_REF as its vectorized statement; otherwise
4044 get the next data-ref from RESULT_CHAIN. */
4045 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4046 break;
4050 VEC_free (tree, heap, result_chain);
4051 return true;
4054 /* Function vect_force_dr_alignment_p.
4056 Returns whether the alignment of a DECL can be forced to be aligned
4057 on ALIGNMENT bit boundary. */
4059 bool
4060 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4062 if (TREE_CODE (decl) != VAR_DECL)
4063 return false;
4065 if (DECL_EXTERNAL (decl))
4066 return false;
4068 if (TREE_ASM_WRITTEN (decl))
4069 return false;
4071 if (TREE_STATIC (decl))
4072 return (alignment <= MAX_OFILE_ALIGNMENT);
4073 else
4074 return (alignment <= MAX_STACK_ALIGNMENT);
4078 /* Return whether the data reference DR is supported with respect to its
4079 alignment.
4080 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4081 it is aligned, i.e., check if it is possible to vectorize it with different
4082 alignment. */
4084 enum dr_alignment_support
4085 vect_supportable_dr_alignment (struct data_reference *dr,
4086 bool check_aligned_accesses)
4088 gimple stmt = DR_STMT (dr);
4089 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4090 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4091 enum machine_mode mode = TYPE_MODE (vectype);
4092 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4093 struct loop *vect_loop = NULL;
4094 bool nested_in_vect_loop = false;
4096 if (aligned_access_p (dr) && !check_aligned_accesses)
4097 return dr_aligned;
4099 if (loop_vinfo)
4101 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4102 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4105 /* Possibly unaligned access. */
4107 /* We can choose between using the implicit realignment scheme (generating
4108 a misaligned_move stmt) and the explicit realignment scheme (generating
4109 aligned loads with a REALIGN_LOAD). There are two variants to the
4110 explicit realignment scheme: optimized, and unoptimized.
4111 We can optimize the realignment only if the step between consecutive
4112 vector loads is equal to the vector size. Since the vector memory
4113 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4114 is guaranteed that the misalignment amount remains the same throughout the
4115 execution of the vectorized loop. Therefore, we can create the
4116 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4117 at the loop preheader.
4119 However, in the case of outer-loop vectorization, when vectorizing a
4120 memory access in the inner-loop nested within the LOOP that is now being
4121 vectorized, while it is guaranteed that the misalignment of the
4122 vectorized memory access will remain the same in different outer-loop
4123 iterations, it is *not* guaranteed that is will remain the same throughout
4124 the execution of the inner-loop. This is because the inner-loop advances
4125 with the original scalar step (and not in steps of VS). If the inner-loop
4126 step happens to be a multiple of VS, then the misalignment remains fixed
4127 and we can use the optimized realignment scheme. For example:
4129 for (i=0; i<N; i++)
4130 for (j=0; j<M; j++)
4131 s += a[i+j];
4133 When vectorizing the i-loop in the above example, the step between
4134 consecutive vector loads is 1, and so the misalignment does not remain
4135 fixed across the execution of the inner-loop, and the realignment cannot
4136 be optimized (as illustrated in the following pseudo vectorized loop):
4138 for (i=0; i<N; i+=4)
4139 for (j=0; j<M; j++){
4140 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4141 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4142 // (assuming that we start from an aligned address).
4145 We therefore have to use the unoptimized realignment scheme:
4147 for (i=0; i<N; i+=4)
4148 for (j=k; j<M; j+=4)
4149 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4150 // that the misalignment of the initial address is
4151 // 0).
4153 The loop can then be vectorized as follows:
4155 for (k=0; k<4; k++){
4156 rt = get_realignment_token (&vp[k]);
4157 for (i=0; i<N; i+=4){
4158 v1 = vp[i+k];
4159 for (j=k; j<M; j+=4){
4160 v2 = vp[i+j+VS-1];
4161 va = REALIGN_LOAD <v1,v2,rt>;
4162 vs += va;
4163 v1 = v2;
4166 } */
4168 if (DR_IS_READ (dr))
4170 bool is_packed = false;
4171 tree type = (TREE_TYPE (DR_REF (dr)));
4173 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4174 && (!targetm.vectorize.builtin_mask_for_load
4175 || targetm.vectorize.builtin_mask_for_load ()))
4177 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4178 if ((nested_in_vect_loop
4179 && (TREE_INT_CST_LOW (DR_STEP (dr))
4180 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4181 || !loop_vinfo)
4182 return dr_explicit_realign;
4183 else
4184 return dr_explicit_realign_optimized;
4186 if (!known_alignment_for_access_p (dr))
4188 tree ba = DR_BASE_OBJECT (dr);
4190 if (ba)
4191 is_packed = contains_packed_reference (ba);
4194 if (targetm.vectorize.
4195 support_vector_misalignment (mode, type,
4196 DR_MISALIGNMENT (dr), is_packed))
4197 /* Can't software pipeline the loads, but can at least do them. */
4198 return dr_unaligned_supported;
4200 else
4202 bool is_packed = false;
4203 tree type = (TREE_TYPE (DR_REF (dr)));
4205 if (!known_alignment_for_access_p (dr))
4207 tree ba = DR_BASE_OBJECT (dr);
4209 if (ba)
4210 is_packed = contains_packed_reference (ba);
4213 if (targetm.vectorize.
4214 support_vector_misalignment (mode, type,
4215 DR_MISALIGNMENT (dr), is_packed))
4216 return dr_unaligned_supported;
4219 /* Unsupported. */
4220 return dr_unaligned_unsupported;