fixing all errors
[official-gcc.git] / gcc / tree-vect-data-refs.c
blobd4ba704f45989b51cd25ee16f7f8cf3585784a56
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return the smallest scalar part of STMT.
47 This is used to determine the vectype of the stmt. We generally set the
48 vectype according to the type of the result (lhs). For stmts whose
49 result-type is different than the type of the arguments (e.g., demotion,
50 promotion), vectype will be reset appropriately (later). Note that we have
51 to visit the smallest datatype in this function, because that determines the
52 VF. If the smallest datatype in the loop is present only as the rhs of a
53 promotion operation - we'd miss it.
54 Such a case, where a variable of this datatype does not appear in the lhs
55 anywhere in the loop, can only occur if it's an invariant: e.g.:
56 'int_x = (int) short_inv', which we'd expect to have been optimized away by
57 invariant motion. However, we cannot rely on invariant motion to always
58 take invariants out of the loop, and so in the case of promotion we also
59 have to check the rhs.
60 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
61 types. */
63 tree
64 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
65 HOST_WIDE_INT *rhs_size_unit)
67 tree scalar_type = gimple_expr_type (stmt);
68 HOST_WIDE_INT lhs, rhs;
70 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
72 if (is_gimple_assign (stmt)
73 && (gimple_assign_cast_p (stmt)
74 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
75 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
77 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
79 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
80 if (rhs < lhs)
81 scalar_type = rhs_type;
84 *lhs_size_unit = lhs;
85 *rhs_size_unit = rhs;
86 return scalar_type;
90 /* Find the place of the data-ref in STMT in the interleaving chain that starts
91 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
93 int
94 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
96 gimple next_stmt = first_stmt;
97 int result = 0;
99 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
100 return -1;
102 while (next_stmt && next_stmt != stmt)
104 result++;
105 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
108 if (next_stmt)
109 return result;
110 else
111 return -1;
115 /* Function vect_insert_into_interleaving_chain.
117 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
119 static void
120 vect_insert_into_interleaving_chain (struct data_reference *dra,
121 struct data_reference *drb)
123 gimple prev, next;
124 tree next_init;
125 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
126 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
128 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
129 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
130 while (next)
132 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
133 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
135 /* Insert here. */
136 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
137 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
138 return;
140 prev = next;
141 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
144 /* We got to the end of the list. Insert here. */
145 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
146 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
150 /* Function vect_update_interleaving_chain.
152 For two data-refs DRA and DRB that are a part of a chain interleaved data
153 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
155 There are four possible cases:
156 1. New stmts - both DRA and DRB are not a part of any chain:
157 FIRST_DR = DRB
158 NEXT_DR (DRB) = DRA
159 2. DRB is a part of a chain and DRA is not:
160 no need to update FIRST_DR
161 no need to insert DRB
162 insert DRA according to init
163 3. DRA is a part of a chain and DRB is not:
164 if (init of FIRST_DR > init of DRB)
165 FIRST_DR = DRB
166 NEXT(FIRST_DR) = previous FIRST_DR
167 else
168 insert DRB according to its init
169 4. both DRA and DRB are in some interleaving chains:
170 choose the chain with the smallest init of FIRST_DR
171 insert the nodes of the second chain into the first one. */
173 static void
174 vect_update_interleaving_chain (struct data_reference *drb,
175 struct data_reference *dra)
177 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
178 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
179 tree next_init, init_dra_chain, init_drb_chain;
180 gimple first_a, first_b;
181 tree node_init;
182 gimple node, prev, next, first_stmt;
184 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
185 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
187 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
188 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
189 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
190 return;
193 /* 2. DRB is a part of a chain and DRA is not. */
194 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
196 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
197 /* Insert DRA into the chain of DRB. */
198 vect_insert_into_interleaving_chain (dra, drb);
199 return;
202 /* 3. DRA is a part of a chain and DRB is not. */
203 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
205 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
206 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
207 old_first_stmt)));
208 gimple tmp;
210 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
212 /* DRB's init is smaller than the init of the stmt previously marked
213 as the first stmt of the interleaving chain of DRA. Therefore, we
214 update FIRST_STMT and put DRB in the head of the list. */
215 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
216 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
218 /* Update all the stmts in the list to point to the new FIRST_STMT. */
219 tmp = old_first_stmt;
220 while (tmp)
222 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
223 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
226 else
228 /* Insert DRB in the list of DRA. */
229 vect_insert_into_interleaving_chain (drb, dra);
230 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
232 return;
235 /* 4. both DRA and DRB are in some interleaving chains. */
236 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
237 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
238 if (first_a == first_b)
239 return;
240 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
241 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
243 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
245 /* Insert the nodes of DRA chain into the DRB chain.
246 After inserting a node, continue from this node of the DRB chain (don't
247 start from the beginning. */
248 node = DR_GROUP_FIRST_DR (stmtinfo_a);
249 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
250 first_stmt = first_b;
252 else
254 /* Insert the nodes of DRB chain into the DRA chain.
255 After inserting a node, continue from this node of the DRA chain (don't
256 start from the beginning. */
257 node = DR_GROUP_FIRST_DR (stmtinfo_b);
258 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
259 first_stmt = first_a;
262 while (node)
264 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
265 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
266 while (next)
268 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
269 if (tree_int_cst_compare (next_init, node_init) > 0)
271 /* Insert here. */
272 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
274 prev = node;
275 break;
277 prev = next;
278 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
280 if (!next)
282 /* We got to the end of the list. Insert here. */
283 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
285 prev = node;
287 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
288 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
292 /* Check dependence between DRA and DRB for basic block vectorization.
293 If the accesses share same bases and offsets, we can compare their initial
294 constant offsets to decide whether they differ or not. In case of a read-
295 write dependence we check that the load is before the store to ensure that
296 vectorization will not change the order of the accesses. */
298 static bool
299 vect_drs_dependent_in_basic_block (struct data_reference *dra,
300 struct data_reference *drb)
302 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
303 gimple earlier_stmt;
305 /* We only call this function for pairs of loads and stores, but we verify
306 it here. */
307 if (DR_IS_READ (dra) == DR_IS_READ (drb))
309 if (DR_IS_READ (dra))
310 return false;
311 else
312 return true;
315 /* Check that the data-refs have same bases and offsets. If not, we can't
316 determine if they are dependent. */
317 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
318 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
319 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
320 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
321 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
322 || !dr_equal_offsets_p (dra, drb))
323 return true;
325 /* Check the types. */
326 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
327 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
329 if (type_size_a != type_size_b
330 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
331 TREE_TYPE (DR_REF (drb))))
332 return true;
334 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
335 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
337 /* Two different locations - no dependence. */
338 if (init_a != init_b)
339 return false;
341 /* We have a read-write dependence. Check that the load is before the store.
342 When we vectorize basic blocks, vector load can be only before
343 corresponding scalar load, and vector store can be only after its
344 corresponding scalar store. So the order of the acceses is preserved in
345 case the load is before the store. */
346 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
347 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
348 return false;
350 return true;
354 /* Function vect_check_interleaving.
356 Check if DRA and DRB are a part of interleaving. In case they are, insert
357 DRA and DRB in an interleaving chain. */
359 static bool
360 vect_check_interleaving (struct data_reference *dra,
361 struct data_reference *drb)
363 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
365 /* Check that the data-refs have same first location (except init) and they
366 are both either store or load (not load and store). */
367 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
368 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
369 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
370 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
371 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
372 || !dr_equal_offsets_p (dra, drb)
373 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
374 || DR_IS_READ (dra) != DR_IS_READ (drb))
375 return false;
377 /* Check:
378 1. data-refs are of the same type
379 2. their steps are equal
380 3. the step (if greater than zero) is greater than the difference between
381 data-refs' inits. */
382 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
383 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
385 if (type_size_a != type_size_b
386 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
387 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
388 TREE_TYPE (DR_REF (drb))))
389 return false;
391 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
392 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
393 step = TREE_INT_CST_LOW (DR_STEP (dra));
395 if (init_a > init_b)
397 /* If init_a == init_b + the size of the type * k, we have an interleaving,
398 and DRB is accessed before DRA. */
399 diff_mod_size = (init_a - init_b) % type_size_a;
401 if (step && (init_a - init_b) > step)
402 return false;
404 if (diff_mod_size == 0)
406 vect_update_interleaving_chain (drb, dra);
407 if (vect_print_dump_info (REPORT_DR_DETAILS))
409 fprintf (vect_dump, "Detected interleaving ");
410 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
411 fprintf (vect_dump, " and ");
412 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
414 return true;
417 else
419 /* If init_b == init_a + the size of the type * k, we have an
420 interleaving, and DRA is accessed before DRB. */
421 diff_mod_size = (init_b - init_a) % type_size_a;
423 if (step && (init_b - init_a) > step)
424 return false;
426 if (diff_mod_size == 0)
428 vect_update_interleaving_chain (dra, drb);
429 if (vect_print_dump_info (REPORT_DR_DETAILS))
431 fprintf (vect_dump, "Detected interleaving ");
432 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
433 fprintf (vect_dump, " and ");
434 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
436 return true;
440 return false;
443 /* Check if data references pointed by DR_I and DR_J are same or
444 belong to same interleaving group. Return FALSE if drs are
445 different, otherwise return TRUE. */
447 static bool
448 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
450 gimple stmt_i = DR_STMT (dr_i);
451 gimple stmt_j = DR_STMT (dr_j);
453 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
454 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
455 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
456 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
457 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
458 return true;
459 else
460 return false;
463 /* If address ranges represented by DDR_I and DDR_J are equal,
464 return TRUE, otherwise return FALSE. */
466 static bool
467 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
469 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
470 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
471 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
472 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
473 return true;
474 else
475 return false;
478 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
479 tested at run-time. Return TRUE if DDR was successfully inserted.
480 Return false if versioning is not supported. */
482 static bool
483 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
485 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
487 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
488 return false;
490 if (vect_print_dump_info (REPORT_DR_DETAILS))
492 fprintf (vect_dump, "mark for run-time aliasing test between ");
493 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
494 fprintf (vect_dump, " and ");
495 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
498 if (optimize_loop_nest_for_size_p (loop))
500 if (vect_print_dump_info (REPORT_DR_DETAILS))
501 fprintf (vect_dump, "versioning not supported when optimizing for size.");
502 return false;
505 /* FORNOW: We don't support versioning with outer-loop vectorization. */
506 if (loop->inner)
508 if (vect_print_dump_info (REPORT_DR_DETAILS))
509 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
510 return false;
513 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
514 return true;
518 /* Function vect_analyze_data_ref_dependence.
520 Return TRUE if there (might) exist a dependence between a memory-reference
521 DRA and a memory-reference DRB. When versioning for alias may check a
522 dependence at run-time, return FALSE. Adjust *MAX_VF according to
523 the data dependence. */
525 static bool
526 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
527 loop_vec_info loop_vinfo, int *max_vf,
528 bool *data_dependence_in_bb)
530 unsigned int i;
531 struct loop *loop = NULL;
532 struct data_reference *dra = DDR_A (ddr);
533 struct data_reference *drb = DDR_B (ddr);
534 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
535 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
536 lambda_vector dist_v;
537 unsigned int loop_depth;
539 /* Don't bother to analyze statements marked as unvectorizable. */
540 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
541 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
542 return false;
544 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
546 /* Independent data accesses. */
547 vect_check_interleaving (dra, drb);
548 return false;
551 if (loop_vinfo)
552 loop = LOOP_VINFO_LOOP (loop_vinfo);
554 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
555 return false;
557 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
559 if (loop_vinfo)
561 if (vect_print_dump_info (REPORT_DR_DETAILS))
563 fprintf (vect_dump, "versioning for alias required: "
564 "can't determine dependence between ");
565 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
566 fprintf (vect_dump, " and ");
567 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
570 /* Add to list of ddrs that need to be tested at run-time. */
571 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
574 /* When vectorizing a basic block unknown depnedence can still mean
575 strided access. */
576 if (vect_check_interleaving (dra, drb))
577 return false;
579 if (vect_print_dump_info (REPORT_DR_DETAILS))
581 fprintf (vect_dump, "can't determine dependence between ");
582 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
583 fprintf (vect_dump, " and ");
584 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
587 /* We do not vectorize basic blocks with write-write dependencies. */
588 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
589 return true;
591 /* We deal with read-write dependencies in basic blocks later (by
592 verifying that all the loads in the basic block are before all the
593 stores). */
594 *data_dependence_in_bb = true;
595 return false;
598 /* Versioning for alias is not yet supported for basic block SLP, and
599 dependence distance is unapplicable, hence, in case of known data
600 dependence, basic block vectorization is impossible for now. */
601 if (!loop_vinfo)
603 if (dra != drb && vect_check_interleaving (dra, drb))
604 return false;
606 if (vect_print_dump_info (REPORT_DR_DETAILS))
608 fprintf (vect_dump, "determined dependence between ");
609 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
610 fprintf (vect_dump, " and ");
611 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
614 /* Do not vectorize basic blcoks with write-write dependences. */
615 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
616 return true;
618 /* Check if this dependence is allowed in basic block vectorization. */
619 return vect_drs_dependent_in_basic_block (dra, drb);
622 /* Loop-based vectorization and known data dependence. */
623 if (DDR_NUM_DIST_VECTS (ddr) == 0)
625 if (vect_print_dump_info (REPORT_DR_DETAILS))
627 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
628 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
629 fprintf (vect_dump, " and ");
630 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
632 /* Add to list of ddrs that need to be tested at run-time. */
633 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
636 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
637 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
639 int dist = dist_v[loop_depth];
641 if (vect_print_dump_info (REPORT_DR_DETAILS))
642 fprintf (vect_dump, "dependence distance = %d.", dist);
644 if (dist == 0)
646 if (vect_print_dump_info (REPORT_DR_DETAILS))
648 fprintf (vect_dump, "dependence distance == 0 between ");
649 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
650 fprintf (vect_dump, " and ");
651 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
654 /* For interleaving, mark that there is a read-write dependency if
655 necessary. We check before that one of the data-refs is store. */
656 if (DR_IS_READ (dra))
657 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
658 else
660 if (DR_IS_READ (drb))
661 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
664 continue;
667 if (dist > 0 && DDR_REVERSED_P (ddr))
669 /* If DDR_REVERSED_P the order of the data-refs in DDR was
670 reversed (to make distance vector positive), and the actual
671 distance is negative. */
672 if (vect_print_dump_info (REPORT_DR_DETAILS))
673 fprintf (vect_dump, "dependence distance negative.");
674 continue;
677 if (abs (dist) >= 2
678 && abs (dist) < *max_vf)
680 /* The dependence distance requires reduction of the maximal
681 vectorization factor. */
682 *max_vf = abs (dist);
683 if (vect_print_dump_info (REPORT_DR_DETAILS))
684 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
685 *max_vf);
688 if (abs (dist) >= *max_vf)
690 /* Dependence distance does not create dependence, as far as
691 vectorization is concerned, in this case. */
692 if (vect_print_dump_info (REPORT_DR_DETAILS))
693 fprintf (vect_dump, "dependence distance >= VF.");
694 continue;
697 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
699 fprintf (vect_dump, "not vectorized, possible dependence "
700 "between data-refs ");
701 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
702 fprintf (vect_dump, " and ");
703 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
706 return true;
709 return false;
712 /* Function vect_analyze_data_ref_dependences.
714 Examine all the data references in the loop, and make sure there do not
715 exist any data dependences between them. Set *MAX_VF according to
716 the maximum vectorization factor the data dependences allow. */
718 bool
719 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
720 bb_vec_info bb_vinfo, int *max_vf,
721 bool *data_dependence_in_bb)
723 unsigned int i;
724 VEC (ddr_p, heap) *ddrs = NULL;
725 struct data_dependence_relation *ddr;
727 if (vect_print_dump_info (REPORT_DETAILS))
728 fprintf (vect_dump, "=== vect_analyze_dependences ===");
730 if (loop_vinfo)
731 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
732 else
733 ddrs = BB_VINFO_DDRS (bb_vinfo);
735 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
736 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
737 data_dependence_in_bb))
738 return false;
740 return true;
744 /* Function vect_compute_data_ref_alignment
746 Compute the misalignment of the data reference DR.
748 Output:
749 1. If during the misalignment computation it is found that the data reference
750 cannot be vectorized then false is returned.
751 2. DR_MISALIGNMENT (DR) is defined.
753 FOR NOW: No analysis is actually performed. Misalignment is calculated
754 only for trivial cases. TODO. */
756 static bool
757 vect_compute_data_ref_alignment (struct data_reference *dr)
759 gimple stmt = DR_STMT (dr);
760 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
761 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
762 struct loop *loop = NULL;
763 tree ref = DR_REF (dr);
764 tree vectype;
765 tree base, base_addr;
766 bool base_aligned;
767 tree misalign;
768 tree aligned_to, alignment;
770 if (vect_print_dump_info (REPORT_DETAILS))
771 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
773 if (loop_vinfo)
774 loop = LOOP_VINFO_LOOP (loop_vinfo);
776 /* Initialize misalignment to unknown. */
777 SET_DR_MISALIGNMENT (dr, -1);
779 misalign = DR_INIT (dr);
780 aligned_to = DR_ALIGNED_TO (dr);
781 base_addr = DR_BASE_ADDRESS (dr);
782 vectype = STMT_VINFO_VECTYPE (stmt_info);
784 /* In case the dataref is in an inner-loop of the loop that is being
785 vectorized (LOOP), we use the base and misalignment information
786 relative to the outer-loop (LOOP). This is ok only if the misalignment
787 stays the same throughout the execution of the inner-loop, which is why
788 we have to check that the stride of the dataref in the inner-loop evenly
789 divides by the vector size. */
790 if (loop && nested_in_vect_loop_p (loop, stmt))
792 tree step = DR_STEP (dr);
793 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
795 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
797 if (vect_print_dump_info (REPORT_ALIGNMENT))
798 fprintf (vect_dump, "inner step divides the vector-size.");
799 misalign = STMT_VINFO_DR_INIT (stmt_info);
800 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
801 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
803 else
805 if (vect_print_dump_info (REPORT_ALIGNMENT))
806 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
807 misalign = NULL_TREE;
811 base = build_fold_indirect_ref (base_addr);
812 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
814 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
815 || !misalign)
817 if (vect_print_dump_info (REPORT_ALIGNMENT))
819 fprintf (vect_dump, "Unknown alignment for access: ");
820 print_generic_expr (vect_dump, base, TDF_SLIM);
822 return true;
825 if ((DECL_P (base)
826 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
827 alignment) >= 0)
828 || (TREE_CODE (base_addr) == SSA_NAME
829 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
830 TREE_TYPE (base_addr)))),
831 alignment) >= 0))
832 base_aligned = true;
833 else
834 base_aligned = false;
836 if (!base_aligned)
838 /* Do not change the alignment of global variables if
839 flag_section_anchors is enabled. */
840 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
841 || (TREE_STATIC (base) && flag_section_anchors))
843 if (vect_print_dump_info (REPORT_DETAILS))
845 fprintf (vect_dump, "can't force alignment of ref: ");
846 print_generic_expr (vect_dump, ref, TDF_SLIM);
848 return true;
851 /* Force the alignment of the decl.
852 NOTE: This is the only change to the code we make during
853 the analysis phase, before deciding to vectorize the loop. */
854 if (vect_print_dump_info (REPORT_DETAILS))
856 fprintf (vect_dump, "force alignment of ");
857 print_generic_expr (vect_dump, ref, TDF_SLIM);
860 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
861 DECL_USER_ALIGN (base) = 1;
864 /* At this point we assume that the base is aligned. */
865 gcc_assert (base_aligned
866 || (TREE_CODE (base) == VAR_DECL
867 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
869 /* If this is a backward running DR then first access in the larger
870 vectype actually is N-1 elements before the address in the DR.
871 Adjust misalign accordingly. */
872 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
874 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
875 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
876 otherwise we wouldn't be here. */
877 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
878 /* PLUS because DR_STEP was negative. */
879 misalign = size_binop (PLUS_EXPR, misalign, offset);
882 /* Modulo alignment. */
883 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
885 if (!host_integerp (misalign, 1))
887 /* Negative or overflowed misalignment value. */
888 if (vect_print_dump_info (REPORT_DETAILS))
889 fprintf (vect_dump, "unexpected misalign value");
890 return false;
893 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
895 if (vect_print_dump_info (REPORT_DETAILS))
897 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
898 print_generic_expr (vect_dump, ref, TDF_SLIM);
901 return true;
905 /* Function vect_compute_data_refs_alignment
907 Compute the misalignment of data references in the loop.
908 Return FALSE if a data reference is found that cannot be vectorized. */
910 static bool
911 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
912 bb_vec_info bb_vinfo)
914 VEC (data_reference_p, heap) *datarefs;
915 struct data_reference *dr;
916 unsigned int i;
918 if (loop_vinfo)
919 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
920 else
921 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
923 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
924 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
925 && !vect_compute_data_ref_alignment (dr))
927 if (bb_vinfo)
929 /* Mark unsupported statement as unvectorizable. */
930 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
931 continue;
933 else
934 return false;
937 return true;
941 /* Function vect_update_misalignment_for_peel
943 DR - the data reference whose misalignment is to be adjusted.
944 DR_PEEL - the data reference whose misalignment is being made
945 zero in the vector loop by the peel.
946 NPEEL - the number of iterations in the peel loop if the misalignment
947 of DR_PEEL is known at compile time. */
949 static void
950 vect_update_misalignment_for_peel (struct data_reference *dr,
951 struct data_reference *dr_peel, int npeel)
953 unsigned int i;
954 VEC(dr_p,heap) *same_align_drs;
955 struct data_reference *current_dr;
956 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
957 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
958 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
959 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
961 /* For interleaved data accesses the step in the loop must be multiplied by
962 the size of the interleaving group. */
963 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
964 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
965 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
966 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
968 /* It can be assumed that the data refs with the same alignment as dr_peel
969 are aligned in the vector loop. */
970 same_align_drs
971 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
972 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
974 if (current_dr != dr)
975 continue;
976 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
977 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
978 SET_DR_MISALIGNMENT (dr, 0);
979 return;
982 if (known_alignment_for_access_p (dr)
983 && known_alignment_for_access_p (dr_peel))
985 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
986 int misal = DR_MISALIGNMENT (dr);
987 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
988 misal += negative ? -npeel * dr_size : npeel * dr_size;
989 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
990 SET_DR_MISALIGNMENT (dr, misal);
991 return;
994 if (vect_print_dump_info (REPORT_DETAILS))
995 fprintf (vect_dump, "Setting misalignment to -1.");
996 SET_DR_MISALIGNMENT (dr, -1);
1000 /* Function vect_verify_datarefs_alignment
1002 Return TRUE if all data references in the loop can be
1003 handled with respect to alignment. */
1005 bool
1006 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1008 VEC (data_reference_p, heap) *datarefs;
1009 struct data_reference *dr;
1010 enum dr_alignment_support supportable_dr_alignment;
1011 unsigned int i;
1013 if (loop_vinfo)
1014 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1015 else
1016 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1018 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1020 gimple stmt = DR_STMT (dr);
1021 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1023 /* For interleaving, only the alignment of the first access matters.
1024 Skip statements marked as not vectorizable. */
1025 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1026 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1027 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1028 continue;
1030 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1031 if (!supportable_dr_alignment)
1033 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1035 if (DR_IS_READ (dr))
1036 fprintf (vect_dump,
1037 "not vectorized: unsupported unaligned load.");
1038 else
1039 fprintf (vect_dump,
1040 "not vectorized: unsupported unaligned store.");
1042 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1044 return false;
1046 if (supportable_dr_alignment != dr_aligned
1047 && vect_print_dump_info (REPORT_ALIGNMENT))
1048 fprintf (vect_dump, "Vectorizing an unaligned access.");
1050 return true;
1054 /* Function vector_alignment_reachable_p
1056 Return true if vector alignment for DR is reachable by peeling
1057 a few loop iterations. Return false otherwise. */
1059 static bool
1060 vector_alignment_reachable_p (struct data_reference *dr)
1062 gimple stmt = DR_STMT (dr);
1063 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1064 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1066 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1068 /* For interleaved access we peel only if number of iterations in
1069 the prolog loop ({VF - misalignment}), is a multiple of the
1070 number of the interleaved accesses. */
1071 int elem_size, mis_in_elements;
1072 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1074 /* FORNOW: handle only known alignment. */
1075 if (!known_alignment_for_access_p (dr))
1076 return false;
1078 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1079 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1081 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1082 return false;
1085 /* If misalignment is known at the compile time then allow peeling
1086 only if natural alignment is reachable through peeling. */
1087 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1089 HOST_WIDE_INT elmsize =
1090 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1091 if (vect_print_dump_info (REPORT_DETAILS))
1093 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1094 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1096 if (DR_MISALIGNMENT (dr) % elmsize)
1098 if (vect_print_dump_info (REPORT_DETAILS))
1099 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1100 return false;
1104 if (!known_alignment_for_access_p (dr))
1106 tree type = (TREE_TYPE (DR_REF (dr)));
1107 tree ba = DR_BASE_OBJECT (dr);
1108 bool is_packed = false;
1110 if (ba)
1111 is_packed = contains_packed_reference (ba);
1113 if (vect_print_dump_info (REPORT_DETAILS))
1114 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1115 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1116 return true;
1117 else
1118 return false;
1121 return true;
1125 /* Calculate the cost of the memory access represented by DR. */
1127 static void
1128 vect_get_data_access_cost (struct data_reference *dr,
1129 unsigned int *inside_cost,
1130 unsigned int *outside_cost)
1132 gimple stmt = DR_STMT (dr);
1133 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1134 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1135 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1136 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1137 int ncopies = vf / nunits;
1138 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1140 if (!supportable_dr_alignment)
1141 *inside_cost = VECT_MAX_COST;
1142 else
1144 if (DR_IS_READ (dr))
1145 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1146 else
1147 vect_get_store_cost (dr, ncopies, inside_cost);
1150 if (vect_print_dump_info (REPORT_COST))
1151 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1152 "outside_cost = %d.", *inside_cost, *outside_cost);
1156 static hashval_t
1157 vect_peeling_hash (const void *elem)
1159 const struct _vect_peel_info *peel_info;
1161 peel_info = (const struct _vect_peel_info *) elem;
1162 return (hashval_t) peel_info->npeel;
1166 static int
1167 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1169 const struct _vect_peel_info *a, *b;
1171 a = (const struct _vect_peel_info *) elem1;
1172 b = (const struct _vect_peel_info *) elem2;
1173 return (a->npeel == b->npeel);
1177 /* Insert DR into peeling hash table with NPEEL as key. */
1179 static void
1180 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1181 int npeel)
1183 struct _vect_peel_info elem, *slot;
1184 void **new_slot;
1185 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1187 elem.npeel = npeel;
1188 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1189 &elem);
1190 if (slot)
1191 slot->count++;
1192 else
1194 slot = XNEW (struct _vect_peel_info);
1195 slot->npeel = npeel;
1196 slot->dr = dr;
1197 slot->count = 1;
1198 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1199 INSERT);
1200 *new_slot = slot;
1203 if (!supportable_dr_alignment && !flag_vect_cost_model)
1204 slot->count += VECT_MAX_COST;
1208 /* Traverse peeling hash table to find peeling option that aligns maximum
1209 number of data accesses. */
1211 static int
1212 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1214 vect_peel_info elem = (vect_peel_info) *slot;
1215 vect_peel_extended_info max = (vect_peel_extended_info) data;
1217 if (elem->count > max->peel_info.count)
1219 max->peel_info.npeel = elem->npeel;
1220 max->peel_info.count = elem->count;
1221 max->peel_info.dr = elem->dr;
1224 return 1;
1228 /* Traverse peeling hash table and calculate cost for each peeling option.
1229 Find the one with the lowest cost. */
1231 static int
1232 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1234 vect_peel_info elem = (vect_peel_info) *slot;
1235 vect_peel_extended_info min = (vect_peel_extended_info) data;
1236 int save_misalignment, dummy;
1237 unsigned int inside_cost = 0, outside_cost = 0, i;
1238 gimple stmt = DR_STMT (elem->dr);
1239 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1240 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1241 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1242 struct data_reference *dr;
1244 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1246 stmt = DR_STMT (dr);
1247 stmt_info = vinfo_for_stmt (stmt);
1248 /* For interleaving, only the alignment of the first access
1249 matters. */
1250 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1251 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1252 continue;
1254 save_misalignment = DR_MISALIGNMENT (dr);
1255 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1256 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1257 SET_DR_MISALIGNMENT (dr, save_misalignment);
1260 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1261 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1263 if (inside_cost < min->inside_cost
1264 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1266 min->inside_cost = inside_cost;
1267 min->outside_cost = outside_cost;
1268 min->peel_info.dr = elem->dr;
1269 min->peel_info.npeel = elem->npeel;
1272 return 1;
1276 /* Choose best peeling option by traversing peeling hash table and either
1277 choosing an option with the lowest cost (if cost model is enabled) or the
1278 option that aligns as many accesses as possible. */
1280 static struct data_reference *
1281 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1282 unsigned int *npeel)
1284 struct _vect_peel_extended_info res;
1286 res.peel_info.dr = NULL;
1288 if (flag_vect_cost_model)
1290 res.inside_cost = INT_MAX;
1291 res.outside_cost = INT_MAX;
1292 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1293 vect_peeling_hash_get_lowest_cost, &res);
1295 else
1297 res.peel_info.count = 0;
1298 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1299 vect_peeling_hash_get_most_frequent, &res);
1302 *npeel = res.peel_info.npeel;
1303 return res.peel_info.dr;
1307 /* Function vect_enhance_data_refs_alignment
1309 This pass will use loop versioning and loop peeling in order to enhance
1310 the alignment of data references in the loop.
1312 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1313 original loop is to be vectorized. Any other loops that are created by
1314 the transformations performed in this pass - are not supposed to be
1315 vectorized. This restriction will be relaxed.
1317 This pass will require a cost model to guide it whether to apply peeling
1318 or versioning or a combination of the two. For example, the scheme that
1319 intel uses when given a loop with several memory accesses, is as follows:
1320 choose one memory access ('p') which alignment you want to force by doing
1321 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1322 other accesses are not necessarily aligned, or (2) use loop versioning to
1323 generate one loop in which all accesses are aligned, and another loop in
1324 which only 'p' is necessarily aligned.
1326 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1327 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1328 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1330 Devising a cost model is the most critical aspect of this work. It will
1331 guide us on which access to peel for, whether to use loop versioning, how
1332 many versions to create, etc. The cost model will probably consist of
1333 generic considerations as well as target specific considerations (on
1334 powerpc for example, misaligned stores are more painful than misaligned
1335 loads).
1337 Here are the general steps involved in alignment enhancements:
1339 -- original loop, before alignment analysis:
1340 for (i=0; i<N; i++){
1341 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1342 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1345 -- After vect_compute_data_refs_alignment:
1346 for (i=0; i<N; i++){
1347 x = q[i]; # DR_MISALIGNMENT(q) = 3
1348 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1351 -- Possibility 1: we do loop versioning:
1352 if (p is aligned) {
1353 for (i=0; i<N; i++){ # loop 1A
1354 x = q[i]; # DR_MISALIGNMENT(q) = 3
1355 p[i] = y; # DR_MISALIGNMENT(p) = 0
1358 else {
1359 for (i=0; i<N; i++){ # loop 1B
1360 x = q[i]; # DR_MISALIGNMENT(q) = 3
1361 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1365 -- Possibility 2: we do loop peeling:
1366 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1367 x = q[i];
1368 p[i] = y;
1370 for (i = 3; i < N; i++){ # loop 2A
1371 x = q[i]; # DR_MISALIGNMENT(q) = 0
1372 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1375 -- Possibility 3: combination of loop peeling and versioning:
1376 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1377 x = q[i];
1378 p[i] = y;
1380 if (p is aligned) {
1381 for (i = 3; i<N; i++){ # loop 3A
1382 x = q[i]; # DR_MISALIGNMENT(q) = 0
1383 p[i] = y; # DR_MISALIGNMENT(p) = 0
1386 else {
1387 for (i = 3; i<N; i++){ # loop 3B
1388 x = q[i]; # DR_MISALIGNMENT(q) = 0
1389 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1393 These loops are later passed to loop_transform to be vectorized. The
1394 vectorizer will use the alignment information to guide the transformation
1395 (whether to generate regular loads/stores, or with special handling for
1396 misalignment). */
1398 bool
1399 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1401 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1402 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1403 enum dr_alignment_support supportable_dr_alignment;
1404 struct data_reference *dr0 = NULL, *first_store = NULL;
1405 struct data_reference *dr;
1406 unsigned int i, j;
1407 bool do_peeling = false;
1408 bool do_versioning = false;
1409 bool stat;
1410 gimple stmt;
1411 stmt_vec_info stmt_info;
1412 int vect_versioning_for_alias_required;
1413 unsigned int npeel = 0;
1414 bool all_misalignments_unknown = true;
1415 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1416 unsigned possible_npeel_number = 1;
1417 tree vectype;
1418 unsigned int nelements, mis, same_align_drs_max = 0;
1420 if (vect_print_dump_info (REPORT_DETAILS))
1421 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1423 /* While cost model enhancements are expected in the future, the high level
1424 view of the code at this time is as follows:
1426 A) If there is a misaligned access then see if peeling to align
1427 this access can make all data references satisfy
1428 vect_supportable_dr_alignment. If so, update data structures
1429 as needed and return true.
1431 B) If peeling wasn't possible and there is a data reference with an
1432 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1433 then see if loop versioning checks can be used to make all data
1434 references satisfy vect_supportable_dr_alignment. If so, update
1435 data structures as needed and return true.
1437 C) If neither peeling nor versioning were successful then return false if
1438 any data reference does not satisfy vect_supportable_dr_alignment.
1440 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1442 Note, Possibility 3 above (which is peeling and versioning together) is not
1443 being done at this time. */
1445 /* (1) Peeling to force alignment. */
1447 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1448 Considerations:
1449 + How many accesses will become aligned due to the peeling
1450 - How many accesses will become unaligned due to the peeling,
1451 and the cost of misaligned accesses.
1452 - The cost of peeling (the extra runtime checks, the increase
1453 in code size). */
1455 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1457 stmt = DR_STMT (dr);
1458 stmt_info = vinfo_for_stmt (stmt);
1460 /* For interleaving, only the alignment of the first access
1461 matters. */
1462 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1463 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1464 continue;
1466 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1467 do_peeling = vector_alignment_reachable_p (dr);
1468 if (do_peeling)
1470 if (known_alignment_for_access_p (dr))
1472 unsigned int npeel_tmp;
1473 bool negative = tree_int_cst_compare (DR_STEP (dr),
1474 size_zero_node) < 0;
1476 /* Save info about DR in the hash table. */
1477 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1478 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1479 htab_create (1, vect_peeling_hash,
1480 vect_peeling_hash_eq, free);
1482 vectype = STMT_VINFO_VECTYPE (stmt_info);
1483 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1484 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1485 TREE_TYPE (DR_REF (dr))));
1486 npeel_tmp = (negative
1487 ? (mis - nelements) : (nelements - mis))
1488 & (nelements - 1);
1490 /* For multiple types, it is possible that the bigger type access
1491 will have more than one peeling option. E.g., a loop with two
1492 types: one of size (vector size / 4), and the other one of
1493 size (vector size / 8). Vectorization factor will 8. If both
1494 access are misaligned by 3, the first one needs one scalar
1495 iteration to be aligned, and the second one needs 5. But the
1496 the first one will be aligned also by peeling 5 scalar
1497 iterations, and in that case both accesses will be aligned.
1498 Hence, except for the immediate peeling amount, we also want
1499 to try to add full vector size, while we don't exceed
1500 vectorization factor.
1501 We do this automtically for cost model, since we calculate cost
1502 for every peeling option. */
1503 if (!flag_vect_cost_model)
1504 possible_npeel_number = vf /nelements;
1506 /* Handle the aligned case. We may decide to align some other
1507 access, making DR unaligned. */
1508 if (DR_MISALIGNMENT (dr) == 0)
1510 npeel_tmp = 0;
1511 if (!flag_vect_cost_model)
1512 possible_npeel_number++;
1515 for (j = 0; j < possible_npeel_number; j++)
1517 gcc_assert (npeel_tmp <= vf);
1518 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1519 npeel_tmp += nelements;
1522 all_misalignments_unknown = false;
1523 /* Data-ref that was chosen for the case that all the
1524 misalignments are unknown is not relevant anymore, since we
1525 have a data-ref with known alignment. */
1526 dr0 = NULL;
1528 else
1530 /* If we don't know all the misalignment values, we prefer
1531 peeling for data-ref that has maximum number of data-refs
1532 with the same alignment, unless the target prefers to align
1533 stores over load. */
1534 if (all_misalignments_unknown)
1536 if (same_align_drs_max < VEC_length (dr_p,
1537 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1538 || !dr0)
1540 same_align_drs_max = VEC_length (dr_p,
1541 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1542 dr0 = dr;
1545 if (!first_store && DR_IS_WRITE (dr))
1546 first_store = dr;
1549 /* If there are both known and unknown misaligned accesses in the
1550 loop, we choose peeling amount according to the known
1551 accesses. */
1554 if (!supportable_dr_alignment)
1556 dr0 = dr;
1557 if (!first_store && DR_IS_WRITE (dr))
1558 first_store = dr;
1562 else
1564 if (!aligned_access_p (dr))
1566 if (vect_print_dump_info (REPORT_DETAILS))
1567 fprintf (vect_dump, "vector alignment may not be reachable");
1569 break;
1574 vect_versioning_for_alias_required
1575 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1577 /* Temporarily, if versioning for alias is required, we disable peeling
1578 until we support peeling and versioning. Often peeling for alignment
1579 will require peeling for loop-bound, which in turn requires that we
1580 know how to adjust the loop ivs after the loop. */
1581 if (vect_versioning_for_alias_required
1582 || !vect_can_advance_ivs_p (loop_vinfo)
1583 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1584 do_peeling = false;
1586 if (do_peeling && all_misalignments_unknown
1587 && vect_supportable_dr_alignment (dr0, false))
1590 /* Check if the target requires to prefer stores over loads, i.e., if
1591 misaligned stores are more expensive than misaligned loads (taking
1592 drs with same alignment into account). */
1593 if (first_store && DR_IS_READ (dr0))
1595 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1596 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1597 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1598 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1600 vect_get_data_access_cost (dr0, &load_inside_cost,
1601 &load_outside_cost);
1602 vect_get_data_access_cost (first_store, &store_inside_cost,
1603 &store_outside_cost);
1605 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1606 aligning the load DR0). */
1607 load_inside_penalty = store_inside_cost;
1608 load_outside_penalty = store_outside_cost;
1609 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1610 (vinfo_for_stmt (DR_STMT (first_store))),
1611 i, dr);
1612 i++)
1613 if (DR_IS_READ (dr))
1615 load_inside_penalty += load_inside_cost;
1616 load_outside_penalty += load_outside_cost;
1618 else
1620 load_inside_penalty += store_inside_cost;
1621 load_outside_penalty += store_outside_cost;
1624 /* Calculate the penalty for leaving DR0 unaligned (by
1625 aligning the FIRST_STORE). */
1626 store_inside_penalty = load_inside_cost;
1627 store_outside_penalty = load_outside_cost;
1628 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1629 (vinfo_for_stmt (DR_STMT (dr0))),
1630 i, dr);
1631 i++)
1632 if (DR_IS_READ (dr))
1634 store_inside_penalty += load_inside_cost;
1635 store_outside_penalty += load_outside_cost;
1637 else
1639 store_inside_penalty += store_inside_cost;
1640 store_outside_penalty += store_outside_cost;
1643 if (load_inside_penalty > store_inside_penalty
1644 || (load_inside_penalty == store_inside_penalty
1645 && load_outside_penalty > store_outside_penalty))
1646 dr0 = first_store;
1649 /* In case there are only loads with different unknown misalignments, use
1650 peeling only if it may help to align other accesses in the loop. */
1651 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1652 (vinfo_for_stmt (DR_STMT (dr0))))
1653 && vect_supportable_dr_alignment (dr0, false)
1654 != dr_unaligned_supported)
1655 do_peeling = false;
1658 if (do_peeling && !dr0)
1660 /* Peeling is possible, but there is no data access that is not supported
1661 unless aligned. So we try to choose the best possible peeling. */
1663 /* We should get here only if there are drs with known misalignment. */
1664 gcc_assert (!all_misalignments_unknown);
1666 /* Choose the best peeling from the hash table. */
1667 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1668 if (!dr0 || !npeel)
1669 do_peeling = false;
1672 if (do_peeling)
1674 stmt = DR_STMT (dr0);
1675 stmt_info = vinfo_for_stmt (stmt);
1676 vectype = STMT_VINFO_VECTYPE (stmt_info);
1677 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1679 if (known_alignment_for_access_p (dr0))
1681 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1682 size_zero_node) < 0;
1683 if (!npeel)
1685 /* Since it's known at compile time, compute the number of
1686 iterations in the peeled loop (the peeling factor) for use in
1687 updating DR_MISALIGNMENT values. The peeling factor is the
1688 vectorization factor minus the misalignment as an element
1689 count. */
1690 mis = DR_MISALIGNMENT (dr0);
1691 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1692 npeel = ((negative ? mis - nelements : nelements - mis)
1693 & (nelements - 1));
1696 /* For interleaved data access every iteration accesses all the
1697 members of the group, therefore we divide the number of iterations
1698 by the group size. */
1699 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1700 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1701 npeel /= DR_GROUP_SIZE (stmt_info);
1703 if (vect_print_dump_info (REPORT_DETAILS))
1704 fprintf (vect_dump, "Try peeling by %d", npeel);
1707 /* Ensure that all data refs can be vectorized after the peel. */
1708 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1710 int save_misalignment;
1712 if (dr == dr0)
1713 continue;
1715 stmt = DR_STMT (dr);
1716 stmt_info = vinfo_for_stmt (stmt);
1717 /* For interleaving, only the alignment of the first access
1718 matters. */
1719 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1720 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1721 continue;
1723 save_misalignment = DR_MISALIGNMENT (dr);
1724 vect_update_misalignment_for_peel (dr, dr0, npeel);
1725 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1726 SET_DR_MISALIGNMENT (dr, save_misalignment);
1728 if (!supportable_dr_alignment)
1730 do_peeling = false;
1731 break;
1735 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1737 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1738 if (!stat)
1739 do_peeling = false;
1740 else
1741 return stat;
1744 if (do_peeling)
1746 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1747 If the misalignment of DR_i is identical to that of dr0 then set
1748 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1749 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1750 by the peeling factor times the element size of DR_i (MOD the
1751 vectorization factor times the size). Otherwise, the
1752 misalignment of DR_i must be set to unknown. */
1753 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1754 if (dr != dr0)
1755 vect_update_misalignment_for_peel (dr, dr0, npeel);
1757 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1758 if (npeel)
1759 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1760 else
1761 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1762 SET_DR_MISALIGNMENT (dr0, 0);
1763 if (vect_print_dump_info (REPORT_ALIGNMENT))
1764 fprintf (vect_dump, "Alignment of access forced using peeling.");
1766 if (vect_print_dump_info (REPORT_DETAILS))
1767 fprintf (vect_dump, "Peeling for alignment will be applied.");
1769 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1770 gcc_assert (stat);
1771 return stat;
1776 /* (2) Versioning to force alignment. */
1778 /* Try versioning if:
1779 1) flag_tree_vect_loop_version is TRUE
1780 2) optimize loop for speed
1781 3) there is at least one unsupported misaligned data ref with an unknown
1782 misalignment, and
1783 4) all misaligned data refs with a known misalignment are supported, and
1784 5) the number of runtime alignment checks is within reason. */
1786 do_versioning =
1787 flag_tree_vect_loop_version
1788 && optimize_loop_nest_for_speed_p (loop)
1789 && (!loop->inner); /* FORNOW */
1791 if (do_versioning)
1793 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1795 stmt = DR_STMT (dr);
1796 stmt_info = vinfo_for_stmt (stmt);
1798 /* For interleaving, only the alignment of the first access
1799 matters. */
1800 if (aligned_access_p (dr)
1801 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1802 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1803 continue;
1805 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1807 if (!supportable_dr_alignment)
1809 gimple stmt;
1810 int mask;
1811 tree vectype;
1813 if (known_alignment_for_access_p (dr)
1814 || VEC_length (gimple,
1815 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1816 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1818 do_versioning = false;
1819 break;
1822 stmt = DR_STMT (dr);
1823 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1824 gcc_assert (vectype);
1826 /* The rightmost bits of an aligned address must be zeros.
1827 Construct the mask needed for this test. For example,
1828 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1829 mask must be 15 = 0xf. */
1830 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1832 /* FORNOW: use the same mask to test all potentially unaligned
1833 references in the loop. The vectorizer currently supports
1834 a single vector size, see the reference to
1835 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1836 vectorization factor is computed. */
1837 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1838 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1839 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1840 VEC_safe_push (gimple, heap,
1841 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1842 DR_STMT (dr));
1846 /* Versioning requires at least one misaligned data reference. */
1847 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1848 do_versioning = false;
1849 else if (!do_versioning)
1850 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1853 if (do_versioning)
1855 VEC(gimple,heap) *may_misalign_stmts
1856 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1857 gimple stmt;
1859 /* It can now be assumed that the data references in the statements
1860 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1861 of the loop being vectorized. */
1862 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1864 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1865 dr = STMT_VINFO_DATA_REF (stmt_info);
1866 SET_DR_MISALIGNMENT (dr, 0);
1867 if (vect_print_dump_info (REPORT_ALIGNMENT))
1868 fprintf (vect_dump, "Alignment of access forced using versioning.");
1871 if (vect_print_dump_info (REPORT_DETAILS))
1872 fprintf (vect_dump, "Versioning for alignment will be applied.");
1874 /* Peeling and versioning can't be done together at this time. */
1875 gcc_assert (! (do_peeling && do_versioning));
1877 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1878 gcc_assert (stat);
1879 return stat;
1882 /* This point is reached if neither peeling nor versioning is being done. */
1883 gcc_assert (! (do_peeling || do_versioning));
1885 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1886 return stat;
1890 /* Function vect_find_same_alignment_drs.
1892 Update group and alignment relations according to the chosen
1893 vectorization factor. */
1895 static void
1896 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1897 loop_vec_info loop_vinfo)
1899 unsigned int i;
1900 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1901 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1902 struct data_reference *dra = DDR_A (ddr);
1903 struct data_reference *drb = DDR_B (ddr);
1904 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1905 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1906 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1907 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1908 lambda_vector dist_v;
1909 unsigned int loop_depth;
1911 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1912 return;
1914 if (dra == drb)
1915 return;
1917 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1918 return;
1920 /* Loop-based vectorization and known data dependence. */
1921 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1922 return;
1924 /* Data-dependence analysis reports a distance vector of zero
1925 for data-references that overlap only in the first iteration
1926 but have different sign step (see PR45764).
1927 So as a sanity check require equal DR_STEP. */
1928 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1929 return;
1931 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1932 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1934 int dist = dist_v[loop_depth];
1936 if (vect_print_dump_info (REPORT_DR_DETAILS))
1937 fprintf (vect_dump, "dependence distance = %d.", dist);
1939 /* Same loop iteration. */
1940 if (dist == 0
1941 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1943 /* Two references with distance zero have the same alignment. */
1944 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1945 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1946 if (vect_print_dump_info (REPORT_ALIGNMENT))
1947 fprintf (vect_dump, "accesses have the same alignment.");
1948 if (vect_print_dump_info (REPORT_DR_DETAILS))
1950 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1951 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1952 fprintf (vect_dump, " and ");
1953 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1960 /* Function vect_analyze_data_refs_alignment
1962 Analyze the alignment of the data-references in the loop.
1963 Return FALSE if a data reference is found that cannot be vectorized. */
1965 bool
1966 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1967 bb_vec_info bb_vinfo)
1969 if (vect_print_dump_info (REPORT_DETAILS))
1970 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1972 /* Mark groups of data references with same alignment using
1973 data dependence information. */
1974 if (loop_vinfo)
1976 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1977 struct data_dependence_relation *ddr;
1978 unsigned int i;
1980 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
1981 vect_find_same_alignment_drs (ddr, loop_vinfo);
1984 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1986 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1987 fprintf (vect_dump,
1988 "not vectorized: can't calculate alignment for data ref.");
1989 return false;
1992 return true;
1996 /* Analyze groups of strided accesses: check that DR belongs to a group of
1997 strided accesses of legal size, step, etc. Detect gaps, single element
1998 interleaving, and other special cases. Set strided access info.
1999 Collect groups of strided stores for further use in SLP analysis. */
2001 static bool
2002 vect_analyze_group_access (struct data_reference *dr)
2004 tree step = DR_STEP (dr);
2005 tree scalar_type = TREE_TYPE (DR_REF (dr));
2006 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2007 gimple stmt = DR_STMT (dr);
2008 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2009 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2010 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2011 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2012 HOST_WIDE_INT stride;
2013 bool slp_impossible = false;
2015 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2016 interleaving group (including gaps). */
2017 stride = dr_step / type_size;
2019 /* Not consecutive access is possible only if it is a part of interleaving. */
2020 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2022 /* Check if it this DR is a part of interleaving, and is a single
2023 element of the group that is accessed in the loop. */
2025 /* Gaps are supported only for loads. STEP must be a multiple of the type
2026 size. The size of the group must be a power of 2. */
2027 if (DR_IS_READ (dr)
2028 && (dr_step % type_size) == 0
2029 && stride > 0
2030 && exact_log2 (stride) != -1)
2032 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2033 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2034 if (vect_print_dump_info (REPORT_DR_DETAILS))
2036 fprintf (vect_dump, "Detected single element interleaving ");
2037 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2038 fprintf (vect_dump, " step ");
2039 print_generic_expr (vect_dump, step, TDF_SLIM);
2041 return true;
2044 if (vect_print_dump_info (REPORT_DETAILS))
2046 fprintf (vect_dump, "not consecutive access ");
2047 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2050 if (bb_vinfo)
2052 /* Mark the statement as unvectorizable. */
2053 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2054 return true;
2057 return false;
2060 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2062 /* First stmt in the interleaving chain. Check the chain. */
2063 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2064 struct data_reference *data_ref = dr;
2065 unsigned int count = 1;
2066 tree next_step;
2067 tree prev_init = DR_INIT (data_ref);
2068 gimple prev = stmt;
2069 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2071 while (next)
2073 /* Skip same data-refs. In case that two or more stmts share
2074 data-ref (supported only for loads), we vectorize only the first
2075 stmt, and the rest get their vectorized loads from the first
2076 one. */
2077 if (!tree_int_cst_compare (DR_INIT (data_ref),
2078 DR_INIT (STMT_VINFO_DATA_REF (
2079 vinfo_for_stmt (next)))))
2081 if (DR_IS_WRITE (data_ref))
2083 if (vect_print_dump_info (REPORT_DETAILS))
2084 fprintf (vect_dump, "Two store stmts share the same dr.");
2085 return false;
2088 /* Check that there is no load-store dependencies for this loads
2089 to prevent a case of load-store-load to the same location. */
2090 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2091 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2093 if (vect_print_dump_info (REPORT_DETAILS))
2094 fprintf (vect_dump,
2095 "READ_WRITE dependence in interleaving.");
2096 return false;
2099 /* For load use the same data-ref load. */
2100 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2102 prev = next;
2103 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2104 continue;
2106 prev = next;
2108 /* Check that all the accesses have the same STEP. */
2109 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2110 if (tree_int_cst_compare (step, next_step))
2112 if (vect_print_dump_info (REPORT_DETAILS))
2113 fprintf (vect_dump, "not consecutive access in interleaving");
2114 return false;
2117 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2118 /* Check that the distance between two accesses is equal to the type
2119 size. Otherwise, we have gaps. */
2120 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2121 - TREE_INT_CST_LOW (prev_init)) / type_size;
2122 if (diff != 1)
2124 /* FORNOW: SLP of accesses with gaps is not supported. */
2125 slp_impossible = true;
2126 if (DR_IS_WRITE (data_ref))
2128 if (vect_print_dump_info (REPORT_DETAILS))
2129 fprintf (vect_dump, "interleaved store with gaps");
2130 return false;
2133 gaps += diff - 1;
2136 /* Store the gap from the previous member of the group. If there is no
2137 gap in the access, DR_GROUP_GAP is always 1. */
2138 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2140 prev_init = DR_INIT (data_ref);
2141 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2142 /* Count the number of data-refs in the chain. */
2143 count++;
2146 /* COUNT is the number of accesses found, we multiply it by the size of
2147 the type to get COUNT_IN_BYTES. */
2148 count_in_bytes = type_size * count;
2150 /* Check that the size of the interleaving (including gaps) is not
2151 greater than STEP. */
2152 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2154 if (vect_print_dump_info (REPORT_DETAILS))
2156 fprintf (vect_dump, "interleaving size is greater than step for ");
2157 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2159 return false;
2162 /* Check that the size of the interleaving is equal to STEP for stores,
2163 i.e., that there are no gaps. */
2164 if (dr_step && dr_step != count_in_bytes)
2166 if (DR_IS_READ (dr))
2168 slp_impossible = true;
2169 /* There is a gap after the last load in the group. This gap is a
2170 difference between the stride and the number of elements. When
2171 there is no gap, this difference should be 0. */
2172 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2174 else
2176 if (vect_print_dump_info (REPORT_DETAILS))
2177 fprintf (vect_dump, "interleaved store with gaps");
2178 return false;
2182 /* Check that STEP is a multiple of type size. */
2183 if (dr_step && (dr_step % type_size) != 0)
2185 if (vect_print_dump_info (REPORT_DETAILS))
2187 fprintf (vect_dump, "step is not a multiple of type size: step ");
2188 print_generic_expr (vect_dump, step, TDF_SLIM);
2189 fprintf (vect_dump, " size ");
2190 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2191 TDF_SLIM);
2193 return false;
2196 /* FORNOW: we handle only interleaving that is a power of 2.
2197 We don't fail here if it may be still possible to vectorize the
2198 group using SLP. If not, the size of the group will be checked in
2199 vect_analyze_operations, and the vectorization will fail. */
2200 if (exact_log2 (stride) == -1)
2202 if (vect_print_dump_info (REPORT_DETAILS))
2203 fprintf (vect_dump, "interleaving is not a power of 2");
2205 if (slp_impossible)
2206 return false;
2209 if (stride == 0)
2210 stride = count;
2212 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2213 if (vect_print_dump_info (REPORT_DETAILS))
2214 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2216 /* SLP: create an SLP data structure for every interleaving group of
2217 stores for further analysis in vect_analyse_slp. */
2218 if (DR_IS_WRITE (dr) && !slp_impossible)
2220 if (loop_vinfo)
2221 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2222 stmt);
2223 if (bb_vinfo)
2224 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2225 stmt);
2229 return true;
2233 /* Analyze the access pattern of the data-reference DR.
2234 In case of non-consecutive accesses call vect_analyze_group_access() to
2235 analyze groups of strided accesses. */
2237 static bool
2238 vect_analyze_data_ref_access (struct data_reference *dr)
2240 tree step = DR_STEP (dr);
2241 tree scalar_type = TREE_TYPE (DR_REF (dr));
2242 gimple stmt = DR_STMT (dr);
2243 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2244 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2245 struct loop *loop = NULL;
2246 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2248 if (loop_vinfo)
2249 loop = LOOP_VINFO_LOOP (loop_vinfo);
2251 if (loop_vinfo && !step)
2253 if (vect_print_dump_info (REPORT_DETAILS))
2254 fprintf (vect_dump, "bad data-ref access in loop");
2255 return false;
2258 /* Don't allow invariant accesses in loops. */
2259 if (loop_vinfo && dr_step == 0)
2260 return false;
2262 if (loop && nested_in_vect_loop_p (loop, stmt))
2264 /* Interleaved accesses are not yet supported within outer-loop
2265 vectorization for references in the inner-loop. */
2266 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2268 /* For the rest of the analysis we use the outer-loop step. */
2269 step = STMT_VINFO_DR_STEP (stmt_info);
2270 dr_step = TREE_INT_CST_LOW (step);
2272 if (dr_step == 0)
2274 if (vect_print_dump_info (REPORT_ALIGNMENT))
2275 fprintf (vect_dump, "zero step in outer loop.");
2276 if (DR_IS_READ (dr))
2277 return true;
2278 else
2279 return false;
2283 /* Consecutive? */
2284 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2285 || (dr_step < 0
2286 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2288 /* Mark that it is not interleaving. */
2289 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2290 return true;
2293 if (loop && nested_in_vect_loop_p (loop, stmt))
2295 if (vect_print_dump_info (REPORT_ALIGNMENT))
2296 fprintf (vect_dump, "strided access in outer loop.");
2297 return false;
2300 /* Not consecutive access - check if it's a part of interleaving group. */
2301 return vect_analyze_group_access (dr);
2305 /* Function vect_analyze_data_ref_accesses.
2307 Analyze the access pattern of all the data references in the loop.
2309 FORNOW: the only access pattern that is considered vectorizable is a
2310 simple step 1 (consecutive) access.
2312 FORNOW: handle only arrays and pointer accesses. */
2314 bool
2315 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2317 unsigned int i;
2318 VEC (data_reference_p, heap) *datarefs;
2319 struct data_reference *dr;
2321 if (vect_print_dump_info (REPORT_DETAILS))
2322 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2324 if (loop_vinfo)
2325 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2326 else
2327 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2329 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2330 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2331 && !vect_analyze_data_ref_access (dr))
2333 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2334 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2336 if (bb_vinfo)
2338 /* Mark the statement as not vectorizable. */
2339 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2340 continue;
2342 else
2343 return false;
2346 return true;
2349 /* Function vect_prune_runtime_alias_test_list.
2351 Prune a list of ddrs to be tested at run-time by versioning for alias.
2352 Return FALSE if resulting list of ddrs is longer then allowed by
2353 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2355 bool
2356 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2358 VEC (ddr_p, heap) * ddrs =
2359 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2360 unsigned i, j;
2362 if (vect_print_dump_info (REPORT_DETAILS))
2363 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2365 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2367 bool found;
2368 ddr_p ddr_i;
2370 ddr_i = VEC_index (ddr_p, ddrs, i);
2371 found = false;
2373 for (j = 0; j < i; j++)
2375 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2377 if (vect_vfa_range_equal (ddr_i, ddr_j))
2379 if (vect_print_dump_info (REPORT_DR_DETAILS))
2381 fprintf (vect_dump, "found equal ranges ");
2382 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2383 fprintf (vect_dump, ", ");
2384 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2385 fprintf (vect_dump, " and ");
2386 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2387 fprintf (vect_dump, ", ");
2388 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2390 found = true;
2391 break;
2395 if (found)
2397 VEC_ordered_remove (ddr_p, ddrs, i);
2398 continue;
2400 i++;
2403 if (VEC_length (ddr_p, ddrs) >
2404 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2406 if (vect_print_dump_info (REPORT_DR_DETAILS))
2408 fprintf (vect_dump,
2409 "disable versioning for alias - max number of generated "
2410 "checks exceeded.");
2413 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2415 return false;
2418 return true;
2422 /* Function vect_analyze_data_refs.
2424 Find all the data references in the loop or basic block.
2426 The general structure of the analysis of data refs in the vectorizer is as
2427 follows:
2428 1- vect_analyze_data_refs(loop/bb): call
2429 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2430 in the loop/bb and their dependences.
2431 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2432 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2433 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2437 bool
2438 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2439 bb_vec_info bb_vinfo,
2440 int *min_vf)
2442 struct loop *loop = NULL;
2443 basic_block bb = NULL;
2444 unsigned int i;
2445 VEC (data_reference_p, heap) *datarefs;
2446 struct data_reference *dr;
2447 tree scalar_type;
2448 bool res;
2450 if (vect_print_dump_info (REPORT_DETAILS))
2451 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2453 if (loop_vinfo)
2455 loop = LOOP_VINFO_LOOP (loop_vinfo);
2456 res = compute_data_dependences_for_loop
2457 (loop, true,
2458 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2459 &LOOP_VINFO_DATAREFS (loop_vinfo),
2460 &LOOP_VINFO_DDRS (loop_vinfo));
2462 if (!res)
2464 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2465 fprintf (vect_dump, "not vectorized: loop contains function calls"
2466 " or data references that cannot be analyzed");
2467 return false;
2470 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2472 else
2474 bb = BB_VINFO_BB (bb_vinfo);
2475 res = compute_data_dependences_for_bb (bb, true,
2476 &BB_VINFO_DATAREFS (bb_vinfo),
2477 &BB_VINFO_DDRS (bb_vinfo));
2478 if (!res)
2480 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2481 fprintf (vect_dump, "not vectorized: basic block contains function"
2482 " calls or data references that cannot be analyzed");
2483 return false;
2486 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2489 /* Go through the data-refs, check that the analysis succeeded. Update
2490 pointer from stmt_vec_info struct to DR and vectype. */
2492 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2494 gimple stmt;
2495 stmt_vec_info stmt_info;
2496 tree base, offset, init;
2497 int vf;
2499 if (!dr || !DR_REF (dr))
2501 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2502 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2503 return false;
2506 stmt = DR_STMT (dr);
2507 stmt_info = vinfo_for_stmt (stmt);
2509 /* Check that analysis of the data-ref succeeded. */
2510 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2511 || !DR_STEP (dr))
2513 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2515 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2516 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2519 if (bb_vinfo)
2521 /* Mark the statement as not vectorizable. */
2522 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2523 continue;
2525 else
2526 return false;
2529 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2531 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2532 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2533 "constant");
2534 if (bb_vinfo)
2536 /* Mark the statement as not vectorizable. */
2537 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2538 continue;
2540 else
2541 return false;
2544 base = unshare_expr (DR_BASE_ADDRESS (dr));
2545 offset = unshare_expr (DR_OFFSET (dr));
2546 init = unshare_expr (DR_INIT (dr));
2548 if (stmt_can_throw_internal (stmt))
2550 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2552 fprintf (vect_dump, "not vectorized: statement can throw an "
2553 "exception ");
2554 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2556 return false;
2559 /* Update DR field in stmt_vec_info struct. */
2561 /* If the dataref is in an inner-loop of the loop that is considered for
2562 for vectorization, we also want to analyze the access relative to
2563 the outer-loop (DR contains information only relative to the
2564 inner-most enclosing loop). We do that by building a reference to the
2565 first location accessed by the inner-loop, and analyze it relative to
2566 the outer-loop. */
2567 if (loop && nested_in_vect_loop_p (loop, stmt))
2569 tree outer_step, outer_base, outer_init;
2570 HOST_WIDE_INT pbitsize, pbitpos;
2571 tree poffset;
2572 enum machine_mode pmode;
2573 int punsignedp, pvolatilep;
2574 affine_iv base_iv, offset_iv;
2575 tree dinit;
2577 /* Build a reference to the first location accessed by the
2578 inner-loop: *(BASE+INIT). (The first location is actually
2579 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2580 tree inner_base = build_fold_indirect_ref
2581 (fold_build2 (POINTER_PLUS_EXPR,
2582 TREE_TYPE (base), base,
2583 fold_convert (sizetype, init)));
2585 if (vect_print_dump_info (REPORT_DETAILS))
2587 fprintf (vect_dump, "analyze in outer-loop: ");
2588 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2591 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2592 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2593 gcc_assert (outer_base != NULL_TREE);
2595 if (pbitpos % BITS_PER_UNIT != 0)
2597 if (vect_print_dump_info (REPORT_DETAILS))
2598 fprintf (vect_dump, "failed: bit offset alignment.\n");
2599 return false;
2602 outer_base = build_fold_addr_expr (outer_base);
2603 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2604 &base_iv, false))
2606 if (vect_print_dump_info (REPORT_DETAILS))
2607 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2608 return false;
2611 if (offset)
2613 if (poffset)
2614 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2615 poffset);
2616 else
2617 poffset = offset;
2620 if (!poffset)
2622 offset_iv.base = ssize_int (0);
2623 offset_iv.step = ssize_int (0);
2625 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2626 &offset_iv, false))
2628 if (vect_print_dump_info (REPORT_DETAILS))
2629 fprintf (vect_dump, "evolution of offset is not affine.\n");
2630 return false;
2633 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2634 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2635 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2636 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2637 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2639 outer_step = size_binop (PLUS_EXPR,
2640 fold_convert (ssizetype, base_iv.step),
2641 fold_convert (ssizetype, offset_iv.step));
2643 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2644 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2645 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2646 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2647 STMT_VINFO_DR_OFFSET (stmt_info) =
2648 fold_convert (ssizetype, offset_iv.base);
2649 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2650 size_int (highest_pow2_factor (offset_iv.base));
2652 if (vect_print_dump_info (REPORT_DETAILS))
2654 fprintf (vect_dump, "\touter base_address: ");
2655 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2656 fprintf (vect_dump, "\n\touter offset from base address: ");
2657 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2658 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2659 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2660 fprintf (vect_dump, "\n\touter step: ");
2661 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2662 fprintf (vect_dump, "\n\touter aligned to: ");
2663 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2667 if (STMT_VINFO_DATA_REF (stmt_info))
2669 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2671 fprintf (vect_dump,
2672 "not vectorized: more than one data ref in stmt: ");
2673 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2675 return false;
2678 STMT_VINFO_DATA_REF (stmt_info) = dr;
2680 /* Set vectype for STMT. */
2681 scalar_type = TREE_TYPE (DR_REF (dr));
2682 STMT_VINFO_VECTYPE (stmt_info) =
2683 get_vectype_for_scalar_type (scalar_type);
2684 if (!STMT_VINFO_VECTYPE (stmt_info))
2686 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2688 fprintf (vect_dump,
2689 "not vectorized: no vectype for stmt: ");
2690 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2691 fprintf (vect_dump, " scalar_type: ");
2692 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2695 if (bb_vinfo)
2697 /* Mark the statement as not vectorizable. */
2698 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2699 continue;
2701 else
2702 return false;
2705 /* Adjust the minimal vectorization factor according to the
2706 vector type. */
2707 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2708 if (vf > *min_vf)
2709 *min_vf = vf;
2712 return true;
2716 /* Function vect_get_new_vect_var.
2718 Returns a name for a new variable. The current naming scheme appends the
2719 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2720 the name of vectorizer generated variables, and appends that to NAME if
2721 provided. */
2723 tree
2724 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2726 const char *prefix;
2727 tree new_vect_var;
2729 switch (var_kind)
2731 case vect_simple_var:
2732 prefix = "vect_";
2733 break;
2734 case vect_scalar_var:
2735 prefix = "stmp_";
2736 break;
2737 case vect_pointer_var:
2738 prefix = "vect_p";
2739 break;
2740 default:
2741 gcc_unreachable ();
2744 if (name)
2746 char* tmp = concat (prefix, name, NULL);
2747 new_vect_var = create_tmp_var (type, tmp);
2748 free (tmp);
2750 else
2751 new_vect_var = create_tmp_var (type, prefix);
2753 /* Mark vector typed variable as a gimple register variable. */
2754 if (TREE_CODE (type) == VECTOR_TYPE)
2755 DECL_GIMPLE_REG_P (new_vect_var) = true;
2757 return new_vect_var;
2761 /* Function vect_create_addr_base_for_vector_ref.
2763 Create an expression that computes the address of the first memory location
2764 that will be accessed for a data reference.
2766 Input:
2767 STMT: The statement containing the data reference.
2768 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2769 OFFSET: Optional. If supplied, it is be added to the initial address.
2770 LOOP: Specify relative to which loop-nest should the address be computed.
2771 For example, when the dataref is in an inner-loop nested in an
2772 outer-loop that is now being vectorized, LOOP can be either the
2773 outer-loop, or the inner-loop. The first memory location accessed
2774 by the following dataref ('in' points to short):
2776 for (i=0; i<N; i++)
2777 for (j=0; j<M; j++)
2778 s += in[i+j]
2780 is as follows:
2781 if LOOP=i_loop: &in (relative to i_loop)
2782 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2784 Output:
2785 1. Return an SSA_NAME whose value is the address of the memory location of
2786 the first vector of the data reference.
2787 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2788 these statement(s) which define the returned SSA_NAME.
2790 FORNOW: We are only handling array accesses with step 1. */
2792 tree
2793 vect_create_addr_base_for_vector_ref (gimple stmt,
2794 gimple_seq *new_stmt_list,
2795 tree offset,
2796 struct loop *loop)
2798 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2799 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2800 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2801 tree base_name;
2802 tree data_ref_base_var;
2803 tree vec_stmt;
2804 tree addr_base, addr_expr;
2805 tree dest;
2806 gimple_seq seq = NULL;
2807 tree base_offset = unshare_expr (DR_OFFSET (dr));
2808 tree init = unshare_expr (DR_INIT (dr));
2809 tree vect_ptr_type;
2810 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2811 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2812 tree base;
2814 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2816 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2818 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2820 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2821 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2822 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2825 if (loop_vinfo)
2826 base_name = build_fold_indirect_ref (data_ref_base);
2827 else
2829 base_offset = ssize_int (0);
2830 init = ssize_int (0);
2831 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2834 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2835 add_referenced_var (data_ref_base_var);
2836 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2837 data_ref_base_var);
2838 gimple_seq_add_seq (new_stmt_list, seq);
2840 /* Create base_offset */
2841 base_offset = size_binop (PLUS_EXPR,
2842 fold_convert (sizetype, base_offset),
2843 fold_convert (sizetype, init));
2844 dest = create_tmp_var (sizetype, "base_off");
2845 add_referenced_var (dest);
2846 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2847 gimple_seq_add_seq (new_stmt_list, seq);
2849 if (offset)
2851 tree tmp = create_tmp_var (sizetype, "offset");
2853 add_referenced_var (tmp);
2854 offset = fold_build2 (MULT_EXPR, sizetype,
2855 fold_convert (sizetype, offset), step);
2856 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2857 base_offset, offset);
2858 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2859 gimple_seq_add_seq (new_stmt_list, seq);
2862 /* base + base_offset */
2863 if (loop_vinfo)
2864 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2865 data_ref_base, base_offset);
2866 else
2868 addr_base = build1 (ADDR_EXPR,
2869 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2870 unshare_expr (DR_REF (dr)));
2873 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2874 base = get_base_address (DR_REF (dr));
2875 if (base
2876 && TREE_CODE (base) == MEM_REF)
2877 vect_ptr_type
2878 = build_qualified_type (vect_ptr_type,
2879 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2881 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2882 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2883 get_name (base_name));
2884 add_referenced_var (addr_expr);
2885 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2886 gimple_seq_add_seq (new_stmt_list, seq);
2888 if (DR_PTR_INFO (dr)
2889 && TREE_CODE (vec_stmt) == SSA_NAME)
2891 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2892 if (offset)
2894 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2895 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2899 if (vect_print_dump_info (REPORT_DETAILS))
2901 fprintf (vect_dump, "created ");
2902 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2905 return vec_stmt;
2909 /* Function vect_create_data_ref_ptr.
2911 Create a new pointer to vector type (vp), that points to the first location
2912 accessed in the loop by STMT, along with the def-use update chain to
2913 appropriately advance the pointer through the loop iterations. Also set
2914 aliasing information for the pointer. This vector pointer is used by the
2915 callers to this function to create a memory reference expression for vector
2916 load/store access.
2918 Input:
2919 1. STMT: a stmt that references memory. Expected to be of the form
2920 GIMPLE_ASSIGN <name, data-ref> or
2921 GIMPLE_ASSIGN <data-ref, name>.
2922 2. AT_LOOP: the loop where the vector memref is to be created.
2923 3. OFFSET (optional): an offset to be added to the initial address accessed
2924 by the data-ref in STMT.
2925 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2926 pointing to the initial address.
2927 5. TYPE: if not NULL indicates the required type of the data-ref.
2929 Output:
2930 1. Declare a new ptr to vector_type, and have it point to the base of the
2931 data reference (initial addressed accessed by the data reference).
2932 For example, for vector of type V8HI, the following code is generated:
2934 v8hi *vp;
2935 vp = (v8hi *)initial_address;
2937 if OFFSET is not supplied:
2938 initial_address = &a[init];
2939 if OFFSET is supplied:
2940 initial_address = &a[init + OFFSET];
2942 Return the initial_address in INITIAL_ADDRESS.
2944 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2945 update the pointer in each iteration of the loop.
2947 Return the increment stmt that updates the pointer in PTR_INCR.
2949 3. Set INV_P to true if the access pattern of the data reference in the
2950 vectorized loop is invariant. Set it to false otherwise.
2952 4. Return the pointer. */
2954 tree
2955 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2956 tree offset, tree *initial_address, gimple *ptr_incr,
2957 bool only_init, bool *inv_p)
2959 tree base_name;
2960 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2961 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2962 struct loop *loop = NULL;
2963 bool nested_in_vect_loop = false;
2964 struct loop *containing_loop = NULL;
2965 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2966 tree vect_ptr_type;
2967 tree vect_ptr;
2968 tree new_temp;
2969 gimple vec_stmt;
2970 gimple_seq new_stmt_list = NULL;
2971 edge pe = NULL;
2972 basic_block new_bb;
2973 tree vect_ptr_init;
2974 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2975 tree vptr;
2976 gimple_stmt_iterator incr_gsi;
2977 bool insert_after;
2978 bool negative;
2979 tree indx_before_incr, indx_after_incr;
2980 gimple incr;
2981 tree step;
2982 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2983 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2984 tree base;
2986 if (loop_vinfo)
2988 loop = LOOP_VINFO_LOOP (loop_vinfo);
2989 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2990 containing_loop = (gimple_bb (stmt))->loop_father;
2991 pe = loop_preheader_edge (loop);
2993 else
2995 gcc_assert (bb_vinfo);
2996 only_init = true;
2997 *ptr_incr = NULL;
3000 /* Check the step (evolution) of the load in LOOP, and record
3001 whether it's invariant. */
3002 if (nested_in_vect_loop)
3003 step = STMT_VINFO_DR_STEP (stmt_info);
3004 else
3005 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3007 if (tree_int_cst_compare (step, size_zero_node) == 0)
3008 *inv_p = true;
3009 else
3010 *inv_p = false;
3011 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3013 /* Create an expression for the first address accessed by this load
3014 in LOOP. */
3015 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3017 if (vect_print_dump_info (REPORT_DETAILS))
3019 tree data_ref_base = base_name;
3020 fprintf (vect_dump, "create vector-pointer variable to type: ");
3021 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3022 if (TREE_CODE (data_ref_base) == VAR_DECL
3023 || TREE_CODE (data_ref_base) == ARRAY_REF)
3024 fprintf (vect_dump, " vectorizing an array ref: ");
3025 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3026 fprintf (vect_dump, " vectorizing a record based array ref: ");
3027 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3028 fprintf (vect_dump, " vectorizing a pointer ref: ");
3029 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3032 /* (1) Create the new vector-pointer variable. */
3033 vect_ptr_type = build_pointer_type (vectype);
3034 base = get_base_address (DR_REF (dr));
3035 if (base
3036 && TREE_CODE (base) == MEM_REF)
3037 vect_ptr_type
3038 = build_qualified_type (vect_ptr_type,
3039 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3040 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3041 get_name (base_name));
3043 /* Vector types inherit the alias set of their component type by default so
3044 we need to use a ref-all pointer if the data reference does not conflict
3045 with the created vector data reference because it is not addressable. */
3046 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3047 get_alias_set (DR_REF (dr))))
3049 vect_ptr_type
3050 = build_pointer_type_for_mode (vectype,
3051 TYPE_MODE (vect_ptr_type), true);
3052 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3053 get_name (base_name));
3056 /* Likewise for any of the data references in the stmt group. */
3057 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
3059 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
3062 tree lhs = gimple_assign_lhs (orig_stmt);
3063 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3064 get_alias_set (lhs)))
3066 vect_ptr_type
3067 = build_pointer_type_for_mode (vectype,
3068 TYPE_MODE (vect_ptr_type), true);
3069 vect_ptr
3070 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3071 get_name (base_name));
3072 break;
3075 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
3077 while (orig_stmt);
3080 add_referenced_var (vect_ptr);
3082 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3083 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3084 def-use update cycles for the pointer: one relative to the outer-loop
3085 (LOOP), which is what steps (3) and (4) below do. The other is relative
3086 to the inner-loop (which is the inner-most loop containing the dataref),
3087 and this is done be step (5) below.
3089 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3090 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3091 redundant. Steps (3),(4) create the following:
3093 vp0 = &base_addr;
3094 LOOP: vp1 = phi(vp0,vp2)
3097 vp2 = vp1 + step
3098 goto LOOP
3100 If there is an inner-loop nested in loop, then step (5) will also be
3101 applied, and an additional update in the inner-loop will be created:
3103 vp0 = &base_addr;
3104 LOOP: vp1 = phi(vp0,vp2)
3106 inner: vp3 = phi(vp1,vp4)
3107 vp4 = vp3 + inner_step
3108 if () goto inner
3110 vp2 = vp1 + step
3111 if () goto LOOP */
3113 /* (2) Calculate the initial address the vector-pointer, and set
3114 the vector-pointer to point to it before the loop. */
3116 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3118 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3119 offset, loop);
3120 if (new_stmt_list)
3122 if (pe)
3124 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3125 gcc_assert (!new_bb);
3127 else
3128 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
3131 *initial_address = new_temp;
3133 /* Create: p = (vectype *) initial_base */
3134 if (TREE_CODE (new_temp) != SSA_NAME
3135 || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
3137 vec_stmt = gimple_build_assign (vect_ptr,
3138 fold_convert (vect_ptr_type, new_temp));
3139 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
3140 /* Copy the points-to information if it exists. */
3141 if (DR_PTR_INFO (dr))
3142 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
3143 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
3144 if (pe)
3146 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3147 gcc_assert (!new_bb);
3149 else
3150 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
3152 else
3153 vect_ptr_init = new_temp;
3155 /* (3) Handle the updating of the vector-pointer inside the loop.
3156 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3157 inner-loop nested in LOOP (during outer-loop vectorization). */
3159 /* No update in loop is required. */
3160 if (only_init && (!loop_vinfo || at_loop == loop))
3161 vptr = vect_ptr_init;
3162 else
3164 /* The step of the vector pointer is the Vector Size. */
3165 tree step = TYPE_SIZE_UNIT (vectype);
3166 /* One exception to the above is when the scalar step of the load in
3167 LOOP is zero. In this case the step here is also zero. */
3168 if (*inv_p)
3169 step = size_zero_node;
3170 else if (negative)
3171 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3173 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3175 create_iv (vect_ptr_init,
3176 fold_convert (vect_ptr_type, step),
3177 vect_ptr, loop, &incr_gsi, insert_after,
3178 &indx_before_incr, &indx_after_incr);
3179 incr = gsi_stmt (incr_gsi);
3180 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3182 /* Copy the points-to information if it exists. */
3183 if (DR_PTR_INFO (dr))
3185 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3186 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3188 if (ptr_incr)
3189 *ptr_incr = incr;
3191 vptr = indx_before_incr;
3194 if (!nested_in_vect_loop || only_init)
3195 return vptr;
3198 /* (4) Handle the updating of the vector-pointer inside the inner-loop
3199 nested in LOOP, if exists. */
3201 gcc_assert (nested_in_vect_loop);
3202 if (!only_init)
3204 standard_iv_increment_position (containing_loop, &incr_gsi,
3205 &insert_after);
3206 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
3207 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3208 &indx_after_incr);
3209 incr = gsi_stmt (incr_gsi);
3210 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3212 /* Copy the points-to information if it exists. */
3213 if (DR_PTR_INFO (dr))
3215 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3216 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3218 if (ptr_incr)
3219 *ptr_incr = incr;
3221 return indx_before_incr;
3223 else
3224 gcc_unreachable ();
3228 /* Function bump_vector_ptr
3230 Increment a pointer (to a vector type) by vector-size. If requested,
3231 i.e. if PTR-INCR is given, then also connect the new increment stmt
3232 to the existing def-use update-chain of the pointer, by modifying
3233 the PTR_INCR as illustrated below:
3235 The pointer def-use update-chain before this function:
3236 DATAREF_PTR = phi (p_0, p_2)
3237 ....
3238 PTR_INCR: p_2 = DATAREF_PTR + step
3240 The pointer def-use update-chain after this function:
3241 DATAREF_PTR = phi (p_0, p_2)
3242 ....
3243 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3244 ....
3245 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3247 Input:
3248 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3249 in the loop.
3250 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3251 the loop. The increment amount across iterations is expected
3252 to be vector_size.
3253 BSI - location where the new update stmt is to be placed.
3254 STMT - the original scalar memory-access stmt that is being vectorized.
3255 BUMP - optional. The offset by which to bump the pointer. If not given,
3256 the offset is assumed to be vector_size.
3258 Output: Return NEW_DATAREF_PTR as illustrated above.
3262 tree
3263 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3264 gimple stmt, tree bump)
3266 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3267 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3268 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3269 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3270 tree update = TYPE_SIZE_UNIT (vectype);
3271 gimple incr_stmt;
3272 ssa_op_iter iter;
3273 use_operand_p use_p;
3274 tree new_dataref_ptr;
3276 if (bump)
3277 update = bump;
3279 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3280 dataref_ptr, update);
3281 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3282 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3283 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3285 /* Copy the points-to information if it exists. */
3286 if (DR_PTR_INFO (dr))
3288 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3289 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3290 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3293 if (!ptr_incr)
3294 return new_dataref_ptr;
3296 /* Update the vector-pointer's cross-iteration increment. */
3297 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3299 tree use = USE_FROM_PTR (use_p);
3301 if (use == dataref_ptr)
3302 SET_USE (use_p, new_dataref_ptr);
3303 else
3304 gcc_assert (tree_int_cst_compare (use, update) == 0);
3307 return new_dataref_ptr;
3311 /* Function vect_create_destination_var.
3313 Create a new temporary of type VECTYPE. */
3315 tree
3316 vect_create_destination_var (tree scalar_dest, tree vectype)
3318 tree vec_dest;
3319 const char *new_name;
3320 tree type;
3321 enum vect_var_kind kind;
3323 kind = vectype ? vect_simple_var : vect_scalar_var;
3324 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3326 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3328 new_name = get_name (scalar_dest);
3329 if (!new_name)
3330 new_name = "var_";
3331 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3332 add_referenced_var (vec_dest);
3334 return vec_dest;
3337 /* Function vect_strided_store_supported.
3339 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3340 and FALSE otherwise. */
3342 bool
3343 vect_strided_store_supported (tree vectype)
3345 optab interleave_high_optab, interleave_low_optab;
3346 enum machine_mode mode;
3348 mode = TYPE_MODE (vectype);
3350 /* Check that the operation is supported. */
3351 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3352 vectype, optab_default);
3353 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3354 vectype, optab_default);
3355 if (!interleave_high_optab || !interleave_low_optab)
3357 if (vect_print_dump_info (REPORT_DETAILS))
3358 fprintf (vect_dump, "no optab for interleave.");
3359 return false;
3362 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3363 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3365 if (vect_print_dump_info (REPORT_DETAILS))
3366 fprintf (vect_dump, "interleave op not supported by target.");
3367 return false;
3370 return true;
3374 /* Function vect_permute_store_chain.
3376 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3377 a power of 2, generate interleave_high/low stmts to reorder the data
3378 correctly for the stores. Return the final references for stores in
3379 RESULT_CHAIN.
3381 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3382 The input is 4 vectors each containing 8 elements. We assign a number to
3383 each element, the input sequence is:
3385 1st vec: 0 1 2 3 4 5 6 7
3386 2nd vec: 8 9 10 11 12 13 14 15
3387 3rd vec: 16 17 18 19 20 21 22 23
3388 4th vec: 24 25 26 27 28 29 30 31
3390 The output sequence should be:
3392 1st vec: 0 8 16 24 1 9 17 25
3393 2nd vec: 2 10 18 26 3 11 19 27
3394 3rd vec: 4 12 20 28 5 13 21 30
3395 4th vec: 6 14 22 30 7 15 23 31
3397 i.e., we interleave the contents of the four vectors in their order.
3399 We use interleave_high/low instructions to create such output. The input of
3400 each interleave_high/low operation is two vectors:
3401 1st vec 2nd vec
3402 0 1 2 3 4 5 6 7
3403 the even elements of the result vector are obtained left-to-right from the
3404 high/low elements of the first vector. The odd elements of the result are
3405 obtained left-to-right from the high/low elements of the second vector.
3406 The output of interleave_high will be: 0 4 1 5
3407 and of interleave_low: 2 6 3 7
3410 The permutation is done in log LENGTH stages. In each stage interleave_high
3411 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3412 where the first argument is taken from the first half of DR_CHAIN and the
3413 second argument from it's second half.
3414 In our example,
3416 I1: interleave_high (1st vec, 3rd vec)
3417 I2: interleave_low (1st vec, 3rd vec)
3418 I3: interleave_high (2nd vec, 4th vec)
3419 I4: interleave_low (2nd vec, 4th vec)
3421 The output for the first stage is:
3423 I1: 0 16 1 17 2 18 3 19
3424 I2: 4 20 5 21 6 22 7 23
3425 I3: 8 24 9 25 10 26 11 27
3426 I4: 12 28 13 29 14 30 15 31
3428 The output of the second stage, i.e. the final result is:
3430 I1: 0 8 16 24 1 9 17 25
3431 I2: 2 10 18 26 3 11 19 27
3432 I3: 4 12 20 28 5 13 21 30
3433 I4: 6 14 22 30 7 15 23 31. */
3435 bool
3436 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3437 unsigned int length,
3438 gimple stmt,
3439 gimple_stmt_iterator *gsi,
3440 VEC(tree,heap) **result_chain)
3442 tree perm_dest, vect1, vect2, high, low;
3443 gimple perm_stmt;
3444 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3445 int i;
3446 unsigned int j;
3447 enum tree_code high_code, low_code;
3449 /* Check that the operation is supported. */
3450 if (!vect_strided_store_supported (vectype))
3451 return false;
3453 *result_chain = VEC_copy (tree, heap, dr_chain);
3455 for (i = 0; i < exact_log2 (length); i++)
3457 for (j = 0; j < length/2; j++)
3459 vect1 = VEC_index (tree, dr_chain, j);
3460 vect2 = VEC_index (tree, dr_chain, j+length/2);
3462 /* Create interleaving stmt:
3463 in the case of big endian:
3464 high = interleave_high (vect1, vect2)
3465 and in the case of little endian:
3466 high = interleave_low (vect1, vect2). */
3467 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3468 DECL_GIMPLE_REG_P (perm_dest) = 1;
3469 add_referenced_var (perm_dest);
3470 if (BYTES_BIG_ENDIAN)
3472 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3473 low_code = VEC_INTERLEAVE_LOW_EXPR;
3475 else
3477 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3478 high_code = VEC_INTERLEAVE_LOW_EXPR;
3480 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3481 vect1, vect2);
3482 high = make_ssa_name (perm_dest, perm_stmt);
3483 gimple_assign_set_lhs (perm_stmt, high);
3484 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3485 VEC_replace (tree, *result_chain, 2*j, high);
3487 /* Create interleaving stmt:
3488 in the case of big endian:
3489 low = interleave_low (vect1, vect2)
3490 and in the case of little endian:
3491 low = interleave_high (vect1, vect2). */
3492 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3493 DECL_GIMPLE_REG_P (perm_dest) = 1;
3494 add_referenced_var (perm_dest);
3495 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3496 vect1, vect2);
3497 low = make_ssa_name (perm_dest, perm_stmt);
3498 gimple_assign_set_lhs (perm_stmt, low);
3499 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3500 VEC_replace (tree, *result_chain, 2*j+1, low);
3502 dr_chain = VEC_copy (tree, heap, *result_chain);
3504 return true;
3507 /* Function vect_setup_realignment
3509 This function is called when vectorizing an unaligned load using
3510 the dr_explicit_realign[_optimized] scheme.
3511 This function generates the following code at the loop prolog:
3513 p = initial_addr;
3514 x msq_init = *(floor(p)); # prolog load
3515 realignment_token = call target_builtin;
3516 loop:
3517 x msq = phi (msq_init, ---)
3519 The stmts marked with x are generated only for the case of
3520 dr_explicit_realign_optimized.
3522 The code above sets up a new (vector) pointer, pointing to the first
3523 location accessed by STMT, and a "floor-aligned" load using that pointer.
3524 It also generates code to compute the "realignment-token" (if the relevant
3525 target hook was defined), and creates a phi-node at the loop-header bb
3526 whose arguments are the result of the prolog-load (created by this
3527 function) and the result of a load that takes place in the loop (to be
3528 created by the caller to this function).
3530 For the case of dr_explicit_realign_optimized:
3531 The caller to this function uses the phi-result (msq) to create the
3532 realignment code inside the loop, and sets up the missing phi argument,
3533 as follows:
3534 loop:
3535 msq = phi (msq_init, lsq)
3536 lsq = *(floor(p')); # load in loop
3537 result = realign_load (msq, lsq, realignment_token);
3539 For the case of dr_explicit_realign:
3540 loop:
3541 msq = *(floor(p)); # load in loop
3542 p' = p + (VS-1);
3543 lsq = *(floor(p')); # load in loop
3544 result = realign_load (msq, lsq, realignment_token);
3546 Input:
3547 STMT - (scalar) load stmt to be vectorized. This load accesses
3548 a memory location that may be unaligned.
3549 BSI - place where new code is to be inserted.
3550 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3551 is used.
3553 Output:
3554 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3555 target hook, if defined.
3556 Return value - the result of the loop-header phi node. */
3558 tree
3559 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3560 tree *realignment_token,
3561 enum dr_alignment_support alignment_support_scheme,
3562 tree init_addr,
3563 struct loop **at_loop)
3565 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3566 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3567 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3568 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3569 struct loop *loop = NULL;
3570 edge pe = NULL;
3571 tree scalar_dest = gimple_assign_lhs (stmt);
3572 tree vec_dest;
3573 gimple inc;
3574 tree ptr;
3575 tree data_ref;
3576 gimple new_stmt;
3577 basic_block new_bb;
3578 tree msq_init = NULL_TREE;
3579 tree new_temp;
3580 gimple phi_stmt;
3581 tree msq = NULL_TREE;
3582 gimple_seq stmts = NULL;
3583 bool inv_p;
3584 bool compute_in_loop = false;
3585 bool nested_in_vect_loop = false;
3586 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3587 struct loop *loop_for_initial_load = NULL;
3589 if (loop_vinfo)
3591 loop = LOOP_VINFO_LOOP (loop_vinfo);
3592 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3595 gcc_assert (alignment_support_scheme == dr_explicit_realign
3596 || alignment_support_scheme == dr_explicit_realign_optimized);
3598 /* We need to generate three things:
3599 1. the misalignment computation
3600 2. the extra vector load (for the optimized realignment scheme).
3601 3. the phi node for the two vectors from which the realignment is
3602 done (for the optimized realignment scheme). */
3604 /* 1. Determine where to generate the misalignment computation.
3606 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3607 calculation will be generated by this function, outside the loop (in the
3608 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3609 caller, inside the loop.
3611 Background: If the misalignment remains fixed throughout the iterations of
3612 the loop, then both realignment schemes are applicable, and also the
3613 misalignment computation can be done outside LOOP. This is because we are
3614 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3615 are a multiple of VS (the Vector Size), and therefore the misalignment in
3616 different vectorized LOOP iterations is always the same.
3617 The problem arises only if the memory access is in an inner-loop nested
3618 inside LOOP, which is now being vectorized using outer-loop vectorization.
3619 This is the only case when the misalignment of the memory access may not
3620 remain fixed throughout the iterations of the inner-loop (as explained in
3621 detail in vect_supportable_dr_alignment). In this case, not only is the
3622 optimized realignment scheme not applicable, but also the misalignment
3623 computation (and generation of the realignment token that is passed to
3624 REALIGN_LOAD) have to be done inside the loop.
3626 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3627 or not, which in turn determines if the misalignment is computed inside
3628 the inner-loop, or outside LOOP. */
3630 if (init_addr != NULL_TREE || !loop_vinfo)
3632 compute_in_loop = true;
3633 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3637 /* 2. Determine where to generate the extra vector load.
3639 For the optimized realignment scheme, instead of generating two vector
3640 loads in each iteration, we generate a single extra vector load in the
3641 preheader of the loop, and in each iteration reuse the result of the
3642 vector load from the previous iteration. In case the memory access is in
3643 an inner-loop nested inside LOOP, which is now being vectorized using
3644 outer-loop vectorization, we need to determine whether this initial vector
3645 load should be generated at the preheader of the inner-loop, or can be
3646 generated at the preheader of LOOP. If the memory access has no evolution
3647 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3648 to be generated inside LOOP (in the preheader of the inner-loop). */
3650 if (nested_in_vect_loop)
3652 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3653 bool invariant_in_outerloop =
3654 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3655 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3657 else
3658 loop_for_initial_load = loop;
3659 if (at_loop)
3660 *at_loop = loop_for_initial_load;
3662 if (loop_for_initial_load)
3663 pe = loop_preheader_edge (loop_for_initial_load);
3665 /* 3. For the case of the optimized realignment, create the first vector
3666 load at the loop preheader. */
3668 if (alignment_support_scheme == dr_explicit_realign_optimized)
3670 /* Create msq_init = *(floor(p1)) in the loop preheader */
3672 gcc_assert (!compute_in_loop);
3673 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3674 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3675 &init_addr, &inc, true, &inv_p);
3676 new_stmt = gimple_build_assign_with_ops
3677 (BIT_AND_EXPR, NULL_TREE, ptr,
3678 build_int_cst (TREE_TYPE (ptr),
3679 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3680 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3681 gimple_assign_set_lhs (new_stmt, new_temp);
3682 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3683 gcc_assert (!new_bb);
3684 data_ref
3685 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3686 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3687 new_stmt = gimple_build_assign (vec_dest, data_ref);
3688 new_temp = make_ssa_name (vec_dest, new_stmt);
3689 gimple_assign_set_lhs (new_stmt, new_temp);
3690 mark_symbols_for_renaming (new_stmt);
3691 if (pe)
3693 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3694 gcc_assert (!new_bb);
3696 else
3697 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3699 msq_init = gimple_assign_lhs (new_stmt);
3702 /* 4. Create realignment token using a target builtin, if available.
3703 It is done either inside the containing loop, or before LOOP (as
3704 determined above). */
3706 if (targetm.vectorize.builtin_mask_for_load)
3708 tree builtin_decl;
3710 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3711 if (!init_addr)
3713 /* Generate the INIT_ADDR computation outside LOOP. */
3714 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3715 NULL_TREE, loop);
3716 if (loop)
3718 pe = loop_preheader_edge (loop);
3719 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3720 gcc_assert (!new_bb);
3722 else
3723 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3726 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3727 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3728 vec_dest =
3729 vect_create_destination_var (scalar_dest,
3730 gimple_call_return_type (new_stmt));
3731 new_temp = make_ssa_name (vec_dest, new_stmt);
3732 gimple_call_set_lhs (new_stmt, new_temp);
3734 if (compute_in_loop)
3735 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3736 else
3738 /* Generate the misalignment computation outside LOOP. */
3739 pe = loop_preheader_edge (loop);
3740 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3741 gcc_assert (!new_bb);
3744 *realignment_token = gimple_call_lhs (new_stmt);
3746 /* The result of the CALL_EXPR to this builtin is determined from
3747 the value of the parameter and no global variables are touched
3748 which makes the builtin a "const" function. Requiring the
3749 builtin to have the "const" attribute makes it unnecessary
3750 to call mark_call_clobbered. */
3751 gcc_assert (TREE_READONLY (builtin_decl));
3754 if (alignment_support_scheme == dr_explicit_realign)
3755 return msq;
3757 gcc_assert (!compute_in_loop);
3758 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3761 /* 5. Create msq = phi <msq_init, lsq> in loop */
3763 pe = loop_preheader_edge (containing_loop);
3764 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3765 msq = make_ssa_name (vec_dest, NULL);
3766 phi_stmt = create_phi_node (msq, containing_loop->header);
3767 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3768 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3770 return msq;
3774 /* Function vect_strided_load_supported.
3776 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3777 and FALSE otherwise. */
3779 bool
3780 vect_strided_load_supported (tree vectype)
3782 optab perm_even_optab, perm_odd_optab;
3783 enum machine_mode mode;
3785 mode = TYPE_MODE (vectype);
3787 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3788 optab_default);
3789 if (!perm_even_optab)
3791 if (vect_print_dump_info (REPORT_DETAILS))
3792 fprintf (vect_dump, "no optab for perm_even.");
3793 return false;
3796 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3798 if (vect_print_dump_info (REPORT_DETAILS))
3799 fprintf (vect_dump, "perm_even op not supported by target.");
3800 return false;
3803 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3804 optab_default);
3805 if (!perm_odd_optab)
3807 if (vect_print_dump_info (REPORT_DETAILS))
3808 fprintf (vect_dump, "no optab for perm_odd.");
3809 return false;
3812 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3814 if (vect_print_dump_info (REPORT_DETAILS))
3815 fprintf (vect_dump, "perm_odd op not supported by target.");
3816 return false;
3818 return true;
3822 /* Function vect_permute_load_chain.
3824 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3825 a power of 2, generate extract_even/odd stmts to reorder the input data
3826 correctly. Return the final references for loads in RESULT_CHAIN.
3828 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3829 The input is 4 vectors each containing 8 elements. We assign a number to each
3830 element, the input sequence is:
3832 1st vec: 0 1 2 3 4 5 6 7
3833 2nd vec: 8 9 10 11 12 13 14 15
3834 3rd vec: 16 17 18 19 20 21 22 23
3835 4th vec: 24 25 26 27 28 29 30 31
3837 The output sequence should be:
3839 1st vec: 0 4 8 12 16 20 24 28
3840 2nd vec: 1 5 9 13 17 21 25 29
3841 3rd vec: 2 6 10 14 18 22 26 30
3842 4th vec: 3 7 11 15 19 23 27 31
3844 i.e., the first output vector should contain the first elements of each
3845 interleaving group, etc.
3847 We use extract_even/odd instructions to create such output. The input of
3848 each extract_even/odd operation is two vectors
3849 1st vec 2nd vec
3850 0 1 2 3 4 5 6 7
3852 and the output is the vector of extracted even/odd elements. The output of
3853 extract_even will be: 0 2 4 6
3854 and of extract_odd: 1 3 5 7
3857 The permutation is done in log LENGTH stages. In each stage extract_even
3858 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3859 their order. In our example,
3861 E1: extract_even (1st vec, 2nd vec)
3862 E2: extract_odd (1st vec, 2nd vec)
3863 E3: extract_even (3rd vec, 4th vec)
3864 E4: extract_odd (3rd vec, 4th vec)
3866 The output for the first stage will be:
3868 E1: 0 2 4 6 8 10 12 14
3869 E2: 1 3 5 7 9 11 13 15
3870 E3: 16 18 20 22 24 26 28 30
3871 E4: 17 19 21 23 25 27 29 31
3873 In order to proceed and create the correct sequence for the next stage (or
3874 for the correct output, if the second stage is the last one, as in our
3875 example), we first put the output of extract_even operation and then the
3876 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3877 The input for the second stage is:
3879 1st vec (E1): 0 2 4 6 8 10 12 14
3880 2nd vec (E3): 16 18 20 22 24 26 28 30
3881 3rd vec (E2): 1 3 5 7 9 11 13 15
3882 4th vec (E4): 17 19 21 23 25 27 29 31
3884 The output of the second stage:
3886 E1: 0 4 8 12 16 20 24 28
3887 E2: 2 6 10 14 18 22 26 30
3888 E3: 1 5 9 13 17 21 25 29
3889 E4: 3 7 11 15 19 23 27 31
3891 And RESULT_CHAIN after reordering:
3893 1st vec (E1): 0 4 8 12 16 20 24 28
3894 2nd vec (E3): 1 5 9 13 17 21 25 29
3895 3rd vec (E2): 2 6 10 14 18 22 26 30
3896 4th vec (E4): 3 7 11 15 19 23 27 31. */
3898 bool
3899 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3900 unsigned int length,
3901 gimple stmt,
3902 gimple_stmt_iterator *gsi,
3903 VEC(tree,heap) **result_chain)
3905 tree perm_dest, data_ref, first_vect, second_vect;
3906 gimple perm_stmt;
3907 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3908 int i;
3909 unsigned int j;
3911 /* Check that the operation is supported. */
3912 if (!vect_strided_load_supported (vectype))
3913 return false;
3915 *result_chain = VEC_copy (tree, heap, dr_chain);
3916 for (i = 0; i < exact_log2 (length); i++)
3918 for (j = 0; j < length; j +=2)
3920 first_vect = VEC_index (tree, dr_chain, j);
3921 second_vect = VEC_index (tree, dr_chain, j+1);
3923 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3924 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3925 DECL_GIMPLE_REG_P (perm_dest) = 1;
3926 add_referenced_var (perm_dest);
3928 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3929 perm_dest, first_vect,
3930 second_vect);
3932 data_ref = make_ssa_name (perm_dest, perm_stmt);
3933 gimple_assign_set_lhs (perm_stmt, data_ref);
3934 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3935 mark_symbols_for_renaming (perm_stmt);
3937 VEC_replace (tree, *result_chain, j/2, data_ref);
3939 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3940 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3941 DECL_GIMPLE_REG_P (perm_dest) = 1;
3942 add_referenced_var (perm_dest);
3944 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3945 perm_dest, first_vect,
3946 second_vect);
3947 data_ref = make_ssa_name (perm_dest, perm_stmt);
3948 gimple_assign_set_lhs (perm_stmt, data_ref);
3949 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3950 mark_symbols_for_renaming (perm_stmt);
3952 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3954 dr_chain = VEC_copy (tree, heap, *result_chain);
3956 return true;
3960 /* Function vect_transform_strided_load.
3962 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3963 to perform their permutation and ascribe the result vectorized statements to
3964 the scalar statements.
3967 bool
3968 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3969 gimple_stmt_iterator *gsi)
3971 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3972 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3973 gimple next_stmt, new_stmt;
3974 VEC(tree,heap) *result_chain = NULL;
3975 unsigned int i, gap_count;
3976 tree tmp_data_ref;
3978 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3979 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3980 vectors, that are ready for vector computation. */
3981 result_chain = VEC_alloc (tree, heap, size);
3982 /* Permute. */
3983 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3984 return false;
3986 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3987 Since we scan the chain starting from it's first node, their order
3988 corresponds the order of data-refs in RESULT_CHAIN. */
3989 next_stmt = first_stmt;
3990 gap_count = 1;
3991 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
3993 if (!next_stmt)
3994 break;
3996 /* Skip the gaps. Loads created for the gaps will be removed by dead
3997 code elimination pass later. No need to check for the first stmt in
3998 the group, since it always exists.
3999 DR_GROUP_GAP is the number of steps in elements from the previous
4000 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4001 correspond to the gaps. */
4002 if (next_stmt != first_stmt
4003 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4005 gap_count++;
4006 continue;
4009 while (next_stmt)
4011 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4012 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4013 copies, and we put the new vector statement in the first available
4014 RELATED_STMT. */
4015 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4016 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4017 else
4019 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4021 gimple prev_stmt =
4022 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4023 gimple rel_stmt =
4024 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4025 while (rel_stmt)
4027 prev_stmt = rel_stmt;
4028 rel_stmt =
4029 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4032 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4033 new_stmt;
4037 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4038 gap_count = 1;
4039 /* If NEXT_STMT accesses the same DR as the previous statement,
4040 put the same TMP_DATA_REF as its vectorized statement; otherwise
4041 get the next data-ref from RESULT_CHAIN. */
4042 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4043 break;
4047 VEC_free (tree, heap, result_chain);
4048 return true;
4051 /* Function vect_force_dr_alignment_p.
4053 Returns whether the alignment of a DECL can be forced to be aligned
4054 on ALIGNMENT bit boundary. */
4056 bool
4057 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4059 if (TREE_CODE (decl) != VAR_DECL)
4060 return false;
4062 if (DECL_EXTERNAL (decl))
4063 return false;
4065 if (TREE_ASM_WRITTEN (decl))
4066 return false;
4068 if (TREE_STATIC (decl))
4069 return (alignment <= MAX_OFILE_ALIGNMENT);
4070 else
4071 return (alignment <= MAX_STACK_ALIGNMENT);
4075 /* Return whether the data reference DR is supported with respect to its
4076 alignment.
4077 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4078 it is aligned, i.e., check if it is possible to vectorize it with different
4079 alignment. */
4081 enum dr_alignment_support
4082 vect_supportable_dr_alignment (struct data_reference *dr,
4083 bool check_aligned_accesses)
4085 gimple stmt = DR_STMT (dr);
4086 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4087 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4088 enum machine_mode mode = TYPE_MODE (vectype);
4089 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4090 struct loop *vect_loop = NULL;
4091 bool nested_in_vect_loop = false;
4093 if (aligned_access_p (dr) && !check_aligned_accesses)
4094 return dr_aligned;
4096 if (loop_vinfo)
4098 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4099 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4102 /* Possibly unaligned access. */
4104 /* We can choose between using the implicit realignment scheme (generating
4105 a misaligned_move stmt) and the explicit realignment scheme (generating
4106 aligned loads with a REALIGN_LOAD). There are two variants to the
4107 explicit realignment scheme: optimized, and unoptimized.
4108 We can optimize the realignment only if the step between consecutive
4109 vector loads is equal to the vector size. Since the vector memory
4110 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4111 is guaranteed that the misalignment amount remains the same throughout the
4112 execution of the vectorized loop. Therefore, we can create the
4113 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4114 at the loop preheader.
4116 However, in the case of outer-loop vectorization, when vectorizing a
4117 memory access in the inner-loop nested within the LOOP that is now being
4118 vectorized, while it is guaranteed that the misalignment of the
4119 vectorized memory access will remain the same in different outer-loop
4120 iterations, it is *not* guaranteed that is will remain the same throughout
4121 the execution of the inner-loop. This is because the inner-loop advances
4122 with the original scalar step (and not in steps of VS). If the inner-loop
4123 step happens to be a multiple of VS, then the misalignment remains fixed
4124 and we can use the optimized realignment scheme. For example:
4126 for (i=0; i<N; i++)
4127 for (j=0; j<M; j++)
4128 s += a[i+j];
4130 When vectorizing the i-loop in the above example, the step between
4131 consecutive vector loads is 1, and so the misalignment does not remain
4132 fixed across the execution of the inner-loop, and the realignment cannot
4133 be optimized (as illustrated in the following pseudo vectorized loop):
4135 for (i=0; i<N; i+=4)
4136 for (j=0; j<M; j++){
4137 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4138 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4139 // (assuming that we start from an aligned address).
4142 We therefore have to use the unoptimized realignment scheme:
4144 for (i=0; i<N; i+=4)
4145 for (j=k; j<M; j+=4)
4146 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4147 // that the misalignment of the initial address is
4148 // 0).
4150 The loop can then be vectorized as follows:
4152 for (k=0; k<4; k++){
4153 rt = get_realignment_token (&vp[k]);
4154 for (i=0; i<N; i+=4){
4155 v1 = vp[i+k];
4156 for (j=k; j<M; j+=4){
4157 v2 = vp[i+j+VS-1];
4158 va = REALIGN_LOAD <v1,v2,rt>;
4159 vs += va;
4160 v1 = v2;
4163 } */
4165 if (DR_IS_READ (dr))
4167 bool is_packed = false;
4168 tree type = (TREE_TYPE (DR_REF (dr)));
4170 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4171 && (!targetm.vectorize.builtin_mask_for_load
4172 || targetm.vectorize.builtin_mask_for_load ()))
4174 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4175 if ((nested_in_vect_loop
4176 && (TREE_INT_CST_LOW (DR_STEP (dr))
4177 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4178 || !loop_vinfo)
4179 return dr_explicit_realign;
4180 else
4181 return dr_explicit_realign_optimized;
4183 if (!known_alignment_for_access_p (dr))
4185 tree ba = DR_BASE_OBJECT (dr);
4187 if (ba)
4188 is_packed = contains_packed_reference (ba);
4191 if (targetm.vectorize.
4192 support_vector_misalignment (mode, type,
4193 DR_MISALIGNMENT (dr), is_packed))
4194 /* Can't software pipeline the loads, but can at least do them. */
4195 return dr_unaligned_supported;
4197 else
4199 bool is_packed = false;
4200 tree type = (TREE_TYPE (DR_REF (dr)));
4202 if (!known_alignment_for_access_p (dr))
4204 tree ba = DR_BASE_OBJECT (dr);
4206 if (ba)
4207 is_packed = contains_packed_reference (ba);
4210 if (targetm.vectorize.
4211 support_vector_misalignment (mode, type,
4212 DR_MISALIGNMENT (dr), is_packed))
4213 return dr_unaligned_supported;
4216 /* Unsupported. */
4217 return dr_unaligned_unsupported;