* xcoffout.h (xcoffout_source_line): Update prototype.
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob7646cc1ad0f290d0290349dce17da5301fadfa29
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "toplev.h"
43 /* Return the smallest scalar part of STMT.
44 This is used to determine the vectype of the stmt. We generally set the
45 vectype according to the type of the result (lhs). For stmts whose
46 result-type is different than the type of the arguments (e.g., demotion,
47 promotion), vectype will be reset appropriately (later). Note that we have
48 to visit the smallest datatype in this function, because that determines the
49 VF. If the smallest datatype in the loop is present only as the rhs of a
50 promotion operation - we'd miss it.
51 Such a case, where a variable of this datatype does not appear in the lhs
52 anywhere in the loop, can only occur if it's an invariant: e.g.:
53 'int_x = (int) short_inv', which we'd expect to have been optimized away by
54 invariant motion. However, we cannot rely on invariant motion to always take
55 invariants out of the loop, and so in the case of promotion we also have to
56 check the rhs.
57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
58 types. */
60 tree
61 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62 HOST_WIDE_INT *rhs_size_unit)
64 tree scalar_type = gimple_expr_type (stmt);
65 HOST_WIDE_INT lhs, rhs;
67 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
69 if (is_gimple_assign (stmt)
70 && (gimple_assign_cast_p (stmt)
71 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
74 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
76 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
77 if (rhs < lhs)
78 scalar_type = rhs_type;
81 *lhs_size_unit = lhs;
82 *rhs_size_unit = rhs;
83 return scalar_type;
87 /* Find the place of the data-ref in STMT in the interleaving chain that starts
88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
90 int
91 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
93 gimple next_stmt = first_stmt;
94 int result = 0;
96 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
97 return -1;
99 while (next_stmt && next_stmt != stmt)
101 result++;
102 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
105 if (next_stmt)
106 return result;
107 else
108 return -1;
112 /* Function vect_insert_into_interleaving_chain.
114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
116 static void
117 vect_insert_into_interleaving_chain (struct data_reference *dra,
118 struct data_reference *drb)
120 gimple prev, next;
121 tree next_init;
122 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
125 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
127 while (next)
129 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
132 /* Insert here. */
133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
135 return;
137 prev = next;
138 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
141 /* We got to the end of the list. Insert here. */
142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
147 /* Function vect_update_interleaving_chain.
149 For two data-refs DRA and DRB that are a part of a chain interleaved data
150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
152 There are four possible cases:
153 1. New stmts - both DRA and DRB are not a part of any chain:
154 FIRST_DR = DRB
155 NEXT_DR (DRB) = DRA
156 2. DRB is a part of a chain and DRA is not:
157 no need to update FIRST_DR
158 no need to insert DRB
159 insert DRA according to init
160 3. DRA is a part of a chain and DRB is not:
161 if (init of FIRST_DR > init of DRB)
162 FIRST_DR = DRB
163 NEXT(FIRST_DR) = previous FIRST_DR
164 else
165 insert DRB according to its init
166 4. both DRA and DRB are in some interleaving chains:
167 choose the chain with the smallest init of FIRST_DR
168 insert the nodes of the second chain into the first one. */
170 static void
171 vect_update_interleaving_chain (struct data_reference *drb,
172 struct data_reference *dra)
174 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176 tree next_init, init_dra_chain, init_drb_chain;
177 gimple first_a, first_b;
178 tree node_init;
179 gimple node, prev, next, first_stmt;
181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
182 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
184 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
187 return;
190 /* 2. DRB is a part of a chain and DRA is not. */
191 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
193 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194 /* Insert DRA into the chain of DRB. */
195 vect_insert_into_interleaving_chain (dra, drb);
196 return;
199 /* 3. DRA is a part of a chain and DRB is not. */
200 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
202 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
204 old_first_stmt)));
205 gimple tmp;
207 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
209 /* DRB's init is smaller than the init of the stmt previously marked
210 as the first stmt of the interleaving chain of DRA. Therefore, we
211 update FIRST_STMT and put DRB in the head of the list. */
212 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
215 /* Update all the stmts in the list to point to the new FIRST_STMT. */
216 tmp = old_first_stmt;
217 while (tmp)
219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
223 else
225 /* Insert DRB in the list of DRA. */
226 vect_insert_into_interleaving_chain (drb, dra);
227 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
229 return;
232 /* 4. both DRA and DRB are in some interleaving chains. */
233 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235 if (first_a == first_b)
236 return;
237 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
240 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
242 /* Insert the nodes of DRA chain into the DRB chain.
243 After inserting a node, continue from this node of the DRB chain (don't
244 start from the beginning. */
245 node = DR_GROUP_FIRST_DR (stmtinfo_a);
246 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247 first_stmt = first_b;
249 else
251 /* Insert the nodes of DRB chain into the DRA chain.
252 After inserting a node, continue from this node of the DRA chain (don't
253 start from the beginning. */
254 node = DR_GROUP_FIRST_DR (stmtinfo_b);
255 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256 first_stmt = first_a;
259 while (node)
261 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
263 while (next)
265 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266 if (tree_int_cst_compare (next_init, node_init) > 0)
268 /* Insert here. */
269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
271 prev = node;
272 break;
274 prev = next;
275 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
277 if (!next)
279 /* We got to the end of the list. Insert here. */
280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
282 prev = node;
284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
290 /* Function vect_equal_offsets.
292 Check if OFFSET1 and OFFSET2 are identical expressions. */
294 static bool
295 vect_equal_offsets (tree offset1, tree offset2)
297 bool res0, res1;
299 STRIP_NOPS (offset1);
300 STRIP_NOPS (offset2);
302 if (offset1 == offset2)
303 return true;
305 if (TREE_CODE (offset1) != TREE_CODE (offset2)
306 || !BINARY_CLASS_P (offset1)
307 || !BINARY_CLASS_P (offset2))
308 return false;
310 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
311 TREE_OPERAND (offset2, 0));
312 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
313 TREE_OPERAND (offset2, 1));
315 return (res0 && res1);
319 /* Function vect_check_interleaving.
321 Check if DRA and DRB are a part of interleaving. In case they are, insert
322 DRA and DRB in an interleaving chain. */
324 static bool
325 vect_check_interleaving (struct data_reference *dra,
326 struct data_reference *drb)
328 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
330 /* Check that the data-refs have same first location (except init) and they
331 are both either store or load (not load and store). */
332 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
333 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
334 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
335 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
336 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
337 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
338 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
339 || DR_IS_READ (dra) != DR_IS_READ (drb))
340 return false;
342 /* Check:
343 1. data-refs are of the same type
344 2. their steps are equal
345 3. the step (if greater than zero) is greater than the difference between
346 data-refs' inits. */
347 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
348 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
350 if (type_size_a != type_size_b
351 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
352 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
353 TREE_TYPE (DR_REF (drb))))
354 return false;
356 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
357 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
358 step = TREE_INT_CST_LOW (DR_STEP (dra));
360 if (init_a > init_b)
362 /* If init_a == init_b + the size of the type * k, we have an interleaving,
363 and DRB is accessed before DRA. */
364 diff_mod_size = (init_a - init_b) % type_size_a;
366 if (step && (init_a - init_b) > step)
367 return false;
369 if (diff_mod_size == 0)
371 vect_update_interleaving_chain (drb, dra);
372 if (vect_print_dump_info (REPORT_DR_DETAILS))
374 fprintf (vect_dump, "Detected interleaving ");
375 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
376 fprintf (vect_dump, " and ");
377 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
379 return true;
382 else
384 /* If init_b == init_a + the size of the type * k, we have an
385 interleaving, and DRA is accessed before DRB. */
386 diff_mod_size = (init_b - init_a) % type_size_a;
388 if (step && (init_b - init_a) > step)
389 return false;
391 if (diff_mod_size == 0)
393 vect_update_interleaving_chain (dra, drb);
394 if (vect_print_dump_info (REPORT_DR_DETAILS))
396 fprintf (vect_dump, "Detected interleaving ");
397 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
398 fprintf (vect_dump, " and ");
399 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
401 return true;
405 return false;
408 /* Check if data references pointed by DR_I and DR_J are same or
409 belong to same interleaving group. Return FALSE if drs are
410 different, otherwise return TRUE. */
412 static bool
413 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
415 gimple stmt_i = DR_STMT (dr_i);
416 gimple stmt_j = DR_STMT (dr_j);
418 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
419 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
420 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
421 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
422 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
423 return true;
424 else
425 return false;
428 /* If address ranges represented by DDR_I and DDR_J are equal,
429 return TRUE, otherwise return FALSE. */
431 static bool
432 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
434 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
435 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
436 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
437 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
438 return true;
439 else
440 return false;
443 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
444 tested at run-time. Return TRUE if DDR was successfully inserted.
445 Return false if versioning is not supported. */
447 static bool
448 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
450 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
452 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
453 return false;
455 if (vect_print_dump_info (REPORT_DR_DETAILS))
457 fprintf (vect_dump, "mark for run-time aliasing test between ");
458 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
459 fprintf (vect_dump, " and ");
460 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
463 if (optimize_loop_nest_for_size_p (loop))
465 if (vect_print_dump_info (REPORT_DR_DETAILS))
466 fprintf (vect_dump, "versioning not supported when optimizing for size.");
467 return false;
470 /* FORNOW: We don't support versioning with outer-loop vectorization. */
471 if (loop->inner)
473 if (vect_print_dump_info (REPORT_DR_DETAILS))
474 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
475 return false;
478 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
479 return true;
483 /* Function vect_analyze_data_ref_dependence.
485 Return TRUE if there (might) exist a dependence between a memory-reference
486 DRA and a memory-reference DRB. When versioning for alias may check a
487 dependence at run-time, return FALSE. */
489 static bool
490 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
491 loop_vec_info loop_vinfo)
493 unsigned int i;
494 struct loop *loop = NULL;
495 int vectorization_factor = 0;
496 struct data_reference *dra = DDR_A (ddr);
497 struct data_reference *drb = DDR_B (ddr);
498 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
499 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
500 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
501 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
502 lambda_vector dist_v;
503 unsigned int loop_depth;
505 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
507 /* Independent data accesses. */
508 vect_check_interleaving (dra, drb);
509 return false;
512 if (loop_vinfo)
514 loop = LOOP_VINFO_LOOP (loop_vinfo);
515 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
518 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
519 return false;
521 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
523 if (loop_vinfo)
525 if (vect_print_dump_info (REPORT_DR_DETAILS))
527 fprintf (vect_dump, "versioning for alias required: "
528 "can't determine dependence between ");
529 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
530 fprintf (vect_dump, " and ");
531 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
534 /* Add to list of ddrs that need to be tested at run-time. */
535 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
538 /* When vectorizing a basic block unknown depnedence can still mean
539 strided access. */
540 if (vect_check_interleaving (dra, drb))
541 return false;
543 if (vect_print_dump_info (REPORT_DR_DETAILS))
545 fprintf (vect_dump, "can't determine dependence between ");
546 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
547 fprintf (vect_dump, " and ");
548 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
551 return true;
554 /* Versioning for alias is not yet supported for basic block SLP, and
555 dependence distance is unapplicable, hence, in case of known data
556 dependence, basic block vectorization is impossible for now. */
557 if (!loop_vinfo)
559 if (dra != drb && vect_check_interleaving (dra, drb))
560 return false;
562 if (vect_print_dump_info (REPORT_DR_DETAILS))
564 fprintf (vect_dump, "determined dependence between ");
565 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
566 fprintf (vect_dump, " and ");
567 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
570 return true;
573 /* Loop-based vectorization and known data dependence. */
574 if (DDR_NUM_DIST_VECTS (ddr) == 0)
576 if (vect_print_dump_info (REPORT_DR_DETAILS))
578 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
579 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
580 fprintf (vect_dump, " and ");
581 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
583 /* Add to list of ddrs that need to be tested at run-time. */
584 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
587 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
588 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
590 int dist = dist_v[loop_depth];
592 if (vect_print_dump_info (REPORT_DR_DETAILS))
593 fprintf (vect_dump, "dependence distance = %d.", dist);
595 /* Same loop iteration. */
596 if (dist % vectorization_factor == 0 && dra_size == drb_size)
598 /* Two references with distance zero have the same alignment. */
599 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
600 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
601 if (vect_print_dump_info (REPORT_ALIGNMENT))
602 fprintf (vect_dump, "accesses have the same alignment.");
603 if (vect_print_dump_info (REPORT_DR_DETAILS))
605 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
606 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
607 fprintf (vect_dump, " and ");
608 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
611 /* For interleaving, mark that there is a read-write dependency if
612 necessary. We check before that one of the data-refs is store. */
613 if (DR_IS_READ (dra))
614 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
615 else
617 if (DR_IS_READ (drb))
618 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
621 continue;
624 if (abs (dist) >= vectorization_factor
625 || (dist > 0 && DDR_REVERSED_P (ddr)))
627 /* Dependence distance does not create dependence, as far as
628 vectorization is concerned, in this case. If DDR_REVERSED_P the
629 order of the data-refs in DDR was reversed (to make distance
630 vector positive), and the actual distance is negative. */
631 if (vect_print_dump_info (REPORT_DR_DETAILS))
632 fprintf (vect_dump, "dependence distance >= VF or negative.");
633 continue;
636 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
638 fprintf (vect_dump, "not vectorized, possible dependence "
639 "between data-refs ");
640 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
641 fprintf (vect_dump, " and ");
642 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
645 return true;
648 return false;
651 /* Function vect_analyze_data_ref_dependences.
653 Examine all the data references in the loop, and make sure there do not
654 exist any data dependences between them. */
656 bool
657 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
658 bb_vec_info bb_vinfo)
660 unsigned int i;
661 VEC (ddr_p, heap) *ddrs = NULL;
662 struct data_dependence_relation *ddr;
664 if (vect_print_dump_info (REPORT_DETAILS))
665 fprintf (vect_dump, "=== vect_analyze_dependences ===");
667 if (loop_vinfo)
668 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
669 else
670 ddrs = BB_VINFO_DDRS (bb_vinfo);
672 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
673 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
674 return false;
676 return true;
680 /* Function vect_compute_data_ref_alignment
682 Compute the misalignment of the data reference DR.
684 Output:
685 1. If during the misalignment computation it is found that the data reference
686 cannot be vectorized then false is returned.
687 2. DR_MISALIGNMENT (DR) is defined.
689 FOR NOW: No analysis is actually performed. Misalignment is calculated
690 only for trivial cases. TODO. */
692 static bool
693 vect_compute_data_ref_alignment (struct data_reference *dr)
695 gimple stmt = DR_STMT (dr);
696 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
698 struct loop *loop = NULL;
699 tree ref = DR_REF (dr);
700 tree vectype;
701 tree base, base_addr;
702 bool base_aligned;
703 tree misalign;
704 tree aligned_to, alignment;
706 if (vect_print_dump_info (REPORT_DETAILS))
707 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
709 if (loop_vinfo)
710 loop = LOOP_VINFO_LOOP (loop_vinfo);
712 /* Initialize misalignment to unknown. */
713 SET_DR_MISALIGNMENT (dr, -1);
715 misalign = DR_INIT (dr);
716 aligned_to = DR_ALIGNED_TO (dr);
717 base_addr = DR_BASE_ADDRESS (dr);
718 vectype = STMT_VINFO_VECTYPE (stmt_info);
720 /* In case the dataref is in an inner-loop of the loop that is being
721 vectorized (LOOP), we use the base and misalignment information
722 relative to the outer-loop (LOOP). This is ok only if the misalignment
723 stays the same throughout the execution of the inner-loop, which is why
724 we have to check that the stride of the dataref in the inner-loop evenly
725 divides by the vector size. */
726 if (loop && nested_in_vect_loop_p (loop, stmt))
728 tree step = DR_STEP (dr);
729 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
731 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
733 if (vect_print_dump_info (REPORT_ALIGNMENT))
734 fprintf (vect_dump, "inner step divides the vector-size.");
735 misalign = STMT_VINFO_DR_INIT (stmt_info);
736 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
737 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
739 else
741 if (vect_print_dump_info (REPORT_ALIGNMENT))
742 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
743 misalign = NULL_TREE;
747 base = build_fold_indirect_ref (base_addr);
748 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
750 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
751 || !misalign)
753 if (vect_print_dump_info (REPORT_ALIGNMENT))
755 fprintf (vect_dump, "Unknown alignment for access: ");
756 print_generic_expr (vect_dump, base, TDF_SLIM);
758 return true;
761 if ((DECL_P (base)
762 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
763 alignment) >= 0)
764 || (TREE_CODE (base_addr) == SSA_NAME
765 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
766 TREE_TYPE (base_addr)))),
767 alignment) >= 0))
768 base_aligned = true;
769 else
770 base_aligned = false;
772 if (!base_aligned)
774 /* Do not change the alignment of global variables if
775 flag_section_anchors is enabled. */
776 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
777 || (TREE_STATIC (base) && flag_section_anchors))
779 if (vect_print_dump_info (REPORT_DETAILS))
781 fprintf (vect_dump, "can't force alignment of ref: ");
782 print_generic_expr (vect_dump, ref, TDF_SLIM);
784 return true;
787 /* Force the alignment of the decl.
788 NOTE: This is the only change to the code we make during
789 the analysis phase, before deciding to vectorize the loop. */
790 if (vect_print_dump_info (REPORT_DETAILS))
791 fprintf (vect_dump, "force alignment");
792 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
793 DECL_USER_ALIGN (base) = 1;
796 /* At this point we assume that the base is aligned. */
797 gcc_assert (base_aligned
798 || (TREE_CODE (base) == VAR_DECL
799 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
801 /* Modulo alignment. */
802 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
804 if (!host_integerp (misalign, 1))
806 /* Negative or overflowed misalignment value. */
807 if (vect_print_dump_info (REPORT_DETAILS))
808 fprintf (vect_dump, "unexpected misalign value");
809 return false;
812 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
814 if (vect_print_dump_info (REPORT_DETAILS))
816 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
817 print_generic_expr (vect_dump, ref, TDF_SLIM);
820 return true;
824 /* Function vect_compute_data_refs_alignment
826 Compute the misalignment of data references in the loop.
827 Return FALSE if a data reference is found that cannot be vectorized. */
829 static bool
830 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
831 bb_vec_info bb_vinfo)
833 VEC (data_reference_p, heap) *datarefs;
834 struct data_reference *dr;
835 unsigned int i;
837 if (loop_vinfo)
838 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
839 else
840 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
842 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
843 if (!vect_compute_data_ref_alignment (dr))
844 return false;
846 return true;
850 /* Function vect_update_misalignment_for_peel
852 DR - the data reference whose misalignment is to be adjusted.
853 DR_PEEL - the data reference whose misalignment is being made
854 zero in the vector loop by the peel.
855 NPEEL - the number of iterations in the peel loop if the misalignment
856 of DR_PEEL is known at compile time. */
858 static void
859 vect_update_misalignment_for_peel (struct data_reference *dr,
860 struct data_reference *dr_peel, int npeel)
862 unsigned int i;
863 VEC(dr_p,heap) *same_align_drs;
864 struct data_reference *current_dr;
865 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
866 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
867 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
868 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
870 /* For interleaved data accesses the step in the loop must be multiplied by
871 the size of the interleaving group. */
872 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
873 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
874 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
875 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
877 /* It can be assumed that the data refs with the same alignment as dr_peel
878 are aligned in the vector loop. */
879 same_align_drs
880 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
881 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
883 if (current_dr != dr)
884 continue;
885 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
886 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
887 SET_DR_MISALIGNMENT (dr, 0);
888 return;
891 if (known_alignment_for_access_p (dr)
892 && known_alignment_for_access_p (dr_peel))
894 int misal = DR_MISALIGNMENT (dr);
895 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
896 misal += npeel * dr_size;
897 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
898 SET_DR_MISALIGNMENT (dr, misal);
899 return;
902 if (vect_print_dump_info (REPORT_DETAILS))
903 fprintf (vect_dump, "Setting misalignment to -1.");
904 SET_DR_MISALIGNMENT (dr, -1);
908 /* Function vect_verify_datarefs_alignment
910 Return TRUE if all data references in the loop can be
911 handled with respect to alignment. */
913 bool
914 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
916 VEC (data_reference_p, heap) *datarefs;
917 struct data_reference *dr;
918 enum dr_alignment_support supportable_dr_alignment;
919 unsigned int i;
921 if (loop_vinfo)
922 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
923 else
924 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
926 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
928 gimple stmt = DR_STMT (dr);
929 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
931 /* For interleaving, only the alignment of the first access matters. */
932 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
933 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
934 continue;
936 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
937 if (!supportable_dr_alignment)
939 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
941 if (DR_IS_READ (dr))
942 fprintf (vect_dump,
943 "not vectorized: unsupported unaligned load.");
944 else
945 fprintf (vect_dump,
946 "not vectorized: unsupported unaligned store.");
948 return false;
950 if (supportable_dr_alignment != dr_aligned
951 && vect_print_dump_info (REPORT_ALIGNMENT))
952 fprintf (vect_dump, "Vectorizing an unaligned access.");
954 return true;
958 /* Function vector_alignment_reachable_p
960 Return true if vector alignment for DR is reachable by peeling
961 a few loop iterations. Return false otherwise. */
963 static bool
964 vector_alignment_reachable_p (struct data_reference *dr)
966 gimple stmt = DR_STMT (dr);
967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
968 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
970 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
972 /* For interleaved access we peel only if number of iterations in
973 the prolog loop ({VF - misalignment}), is a multiple of the
974 number of the interleaved accesses. */
975 int elem_size, mis_in_elements;
976 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
978 /* FORNOW: handle only known alignment. */
979 if (!known_alignment_for_access_p (dr))
980 return false;
982 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
983 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
985 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
986 return false;
989 /* If misalignment is known at the compile time then allow peeling
990 only if natural alignment is reachable through peeling. */
991 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
993 HOST_WIDE_INT elmsize =
994 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
995 if (vect_print_dump_info (REPORT_DETAILS))
997 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
998 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1000 if (DR_MISALIGNMENT (dr) % elmsize)
1002 if (vect_print_dump_info (REPORT_DETAILS))
1003 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1004 return false;
1008 if (!known_alignment_for_access_p (dr))
1010 tree type = (TREE_TYPE (DR_REF (dr)));
1011 tree ba = DR_BASE_OBJECT (dr);
1012 bool is_packed = false;
1014 if (ba)
1015 is_packed = contains_packed_reference (ba);
1017 if (vect_print_dump_info (REPORT_DETAILS))
1018 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1019 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1020 return true;
1021 else
1022 return false;
1025 return true;
1028 /* Function vect_enhance_data_refs_alignment
1030 This pass will use loop versioning and loop peeling in order to enhance
1031 the alignment of data references in the loop.
1033 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1034 original loop is to be vectorized; Any other loops that are created by
1035 the transformations performed in this pass - are not supposed to be
1036 vectorized. This restriction will be relaxed.
1038 This pass will require a cost model to guide it whether to apply peeling
1039 or versioning or a combination of the two. For example, the scheme that
1040 intel uses when given a loop with several memory accesses, is as follows:
1041 choose one memory access ('p') which alignment you want to force by doing
1042 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1043 other accesses are not necessarily aligned, or (2) use loop versioning to
1044 generate one loop in which all accesses are aligned, and another loop in
1045 which only 'p' is necessarily aligned.
1047 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1048 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1049 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1051 Devising a cost model is the most critical aspect of this work. It will
1052 guide us on which access to peel for, whether to use loop versioning, how
1053 many versions to create, etc. The cost model will probably consist of
1054 generic considerations as well as target specific considerations (on
1055 powerpc for example, misaligned stores are more painful than misaligned
1056 loads).
1058 Here are the general steps involved in alignment enhancements:
1060 -- original loop, before alignment analysis:
1061 for (i=0; i<N; i++){
1062 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1063 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1066 -- After vect_compute_data_refs_alignment:
1067 for (i=0; i<N; i++){
1068 x = q[i]; # DR_MISALIGNMENT(q) = 3
1069 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1072 -- Possibility 1: we do loop versioning:
1073 if (p is aligned) {
1074 for (i=0; i<N; i++){ # loop 1A
1075 x = q[i]; # DR_MISALIGNMENT(q) = 3
1076 p[i] = y; # DR_MISALIGNMENT(p) = 0
1079 else {
1080 for (i=0; i<N; i++){ # loop 1B
1081 x = q[i]; # DR_MISALIGNMENT(q) = 3
1082 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1086 -- Possibility 2: we do loop peeling:
1087 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1088 x = q[i];
1089 p[i] = y;
1091 for (i = 3; i < N; i++){ # loop 2A
1092 x = q[i]; # DR_MISALIGNMENT(q) = 0
1093 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1096 -- Possibility 3: combination of loop peeling and versioning:
1097 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1098 x = q[i];
1099 p[i] = y;
1101 if (p is aligned) {
1102 for (i = 3; i<N; i++){ # loop 3A
1103 x = q[i]; # DR_MISALIGNMENT(q) = 0
1104 p[i] = y; # DR_MISALIGNMENT(p) = 0
1107 else {
1108 for (i = 3; i<N; i++){ # loop 3B
1109 x = q[i]; # DR_MISALIGNMENT(q) = 0
1110 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1114 These loops are later passed to loop_transform to be vectorized. The
1115 vectorizer will use the alignment information to guide the transformation
1116 (whether to generate regular loads/stores, or with special handling for
1117 misalignment). */
1119 bool
1120 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1122 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1124 enum dr_alignment_support supportable_dr_alignment;
1125 struct data_reference *dr0 = NULL;
1126 struct data_reference *dr;
1127 unsigned int i;
1128 bool do_peeling = false;
1129 bool do_versioning = false;
1130 bool stat;
1131 gimple stmt;
1132 stmt_vec_info stmt_info;
1133 int vect_versioning_for_alias_required;
1135 if (vect_print_dump_info (REPORT_DETAILS))
1136 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1138 /* While cost model enhancements are expected in the future, the high level
1139 view of the code at this time is as follows:
1141 A) If there is an unsupported misaligned access then see if peeling
1142 to align this access can make all data references satisfy
1143 vect_supportable_dr_alignment. If so, update data structures
1144 as needed and return true.
1146 B) If peeling wasn't possible and there is a data reference with an
1147 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1148 then see if loop versioning checks can be used to make all data
1149 references satisfy vect_supportable_dr_alignment. If so, update
1150 data structures as needed and return true.
1152 C) If neither peeling nor versioning were successful then return false if
1153 any data reference does not satisfy vect_supportable_dr_alignment.
1155 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1157 Note, Possibility 3 above (which is peeling and versioning together) is not
1158 being done at this time. */
1160 /* (1) Peeling to force alignment. */
1162 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1163 Considerations:
1164 + How many accesses will become aligned due to the peeling
1165 - How many accesses will become unaligned due to the peeling,
1166 and the cost of misaligned accesses.
1167 - The cost of peeling (the extra runtime checks, the increase
1168 in code size).
1170 The scheme we use FORNOW: peel to force the alignment of the first
1171 unsupported misaligned access in the loop.
1173 TODO: Use a cost model. */
1175 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1177 stmt = DR_STMT (dr);
1178 stmt_info = vinfo_for_stmt (stmt);
1179 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1181 /* For interleaving, only the alignment of the first access
1182 matters. */
1183 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1184 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1185 continue;
1187 if (!supportable_dr_alignment)
1189 do_peeling = vector_alignment_reachable_p (dr);
1190 if (do_peeling)
1191 dr0 = dr;
1192 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1193 fprintf (vect_dump, "vector alignment may not be reachable");
1194 break;
1198 vect_versioning_for_alias_required
1199 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1201 /* Temporarily, if versioning for alias is required, we disable peeling
1202 until we support peeling and versioning. Often peeling for alignment
1203 will require peeling for loop-bound, which in turn requires that we
1204 know how to adjust the loop ivs after the loop. */
1205 if (vect_versioning_for_alias_required
1206 || !vect_can_advance_ivs_p (loop_vinfo)
1207 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1208 do_peeling = false;
1210 if (do_peeling)
1212 int mis;
1213 int npeel = 0;
1214 gimple stmt = DR_STMT (dr0);
1215 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1216 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1217 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1219 if (known_alignment_for_access_p (dr0))
1221 /* Since it's known at compile time, compute the number of iterations
1222 in the peeled loop (the peeling factor) for use in updating
1223 DR_MISALIGNMENT values. The peeling factor is the vectorization
1224 factor minus the misalignment as an element count. */
1225 mis = DR_MISALIGNMENT (dr0);
1226 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1227 npeel = nelements - mis;
1229 /* For interleaved data access every iteration accesses all the
1230 members of the group, therefore we divide the number of iterations
1231 by the group size. */
1232 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1233 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1234 npeel /= DR_GROUP_SIZE (stmt_info);
1236 if (vect_print_dump_info (REPORT_DETAILS))
1237 fprintf (vect_dump, "Try peeling by %d", npeel);
1240 /* Ensure that all data refs can be vectorized after the peel. */
1241 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1243 int save_misalignment;
1245 if (dr == dr0)
1246 continue;
1248 stmt = DR_STMT (dr);
1249 stmt_info = vinfo_for_stmt (stmt);
1250 /* For interleaving, only the alignment of the first access
1251 matters. */
1252 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1253 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1254 continue;
1256 save_misalignment = DR_MISALIGNMENT (dr);
1257 vect_update_misalignment_for_peel (dr, dr0, npeel);
1258 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1259 SET_DR_MISALIGNMENT (dr, save_misalignment);
1261 if (!supportable_dr_alignment)
1263 do_peeling = false;
1264 break;
1268 if (do_peeling)
1270 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1271 If the misalignment of DR_i is identical to that of dr0 then set
1272 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1273 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1274 by the peeling factor times the element size of DR_i (MOD the
1275 vectorization factor times the size). Otherwise, the
1276 misalignment of DR_i must be set to unknown. */
1277 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1278 if (dr != dr0)
1279 vect_update_misalignment_for_peel (dr, dr0, npeel);
1281 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1282 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1283 SET_DR_MISALIGNMENT (dr0, 0);
1284 if (vect_print_dump_info (REPORT_ALIGNMENT))
1285 fprintf (vect_dump, "Alignment of access forced using peeling.");
1287 if (vect_print_dump_info (REPORT_DETAILS))
1288 fprintf (vect_dump, "Peeling for alignment will be applied.");
1290 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1291 gcc_assert (stat);
1292 return stat;
1297 /* (2) Versioning to force alignment. */
1299 /* Try versioning if:
1300 1) flag_tree_vect_loop_version is TRUE
1301 2) optimize loop for speed
1302 3) there is at least one unsupported misaligned data ref with an unknown
1303 misalignment, and
1304 4) all misaligned data refs with a known misalignment are supported, and
1305 5) the number of runtime alignment checks is within reason. */
1307 do_versioning =
1308 flag_tree_vect_loop_version
1309 && optimize_loop_nest_for_speed_p (loop)
1310 && (!loop->inner); /* FORNOW */
1312 if (do_versioning)
1314 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1316 stmt = DR_STMT (dr);
1317 stmt_info = vinfo_for_stmt (stmt);
1319 /* For interleaving, only the alignment of the first access
1320 matters. */
1321 if (aligned_access_p (dr)
1322 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1323 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1324 continue;
1326 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1328 if (!supportable_dr_alignment)
1330 gimple stmt;
1331 int mask;
1332 tree vectype;
1334 if (known_alignment_for_access_p (dr)
1335 || VEC_length (gimple,
1336 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1337 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1339 do_versioning = false;
1340 break;
1343 stmt = DR_STMT (dr);
1344 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1345 gcc_assert (vectype);
1347 /* The rightmost bits of an aligned address must be zeros.
1348 Construct the mask needed for this test. For example,
1349 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1350 mask must be 15 = 0xf. */
1351 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1353 /* FORNOW: use the same mask to test all potentially unaligned
1354 references in the loop. The vectorizer currently supports
1355 a single vector size, see the reference to
1356 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1357 vectorization factor is computed. */
1358 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1359 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1360 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1361 VEC_safe_push (gimple, heap,
1362 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1363 DR_STMT (dr));
1367 /* Versioning requires at least one misaligned data reference. */
1368 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1369 do_versioning = false;
1370 else if (!do_versioning)
1371 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1374 if (do_versioning)
1376 VEC(gimple,heap) *may_misalign_stmts
1377 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1378 gimple stmt;
1380 /* It can now be assumed that the data references in the statements
1381 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1382 of the loop being vectorized. */
1383 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1385 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1386 dr = STMT_VINFO_DATA_REF (stmt_info);
1387 SET_DR_MISALIGNMENT (dr, 0);
1388 if (vect_print_dump_info (REPORT_ALIGNMENT))
1389 fprintf (vect_dump, "Alignment of access forced using versioning.");
1392 if (vect_print_dump_info (REPORT_DETAILS))
1393 fprintf (vect_dump, "Versioning for alignment will be applied.");
1395 /* Peeling and versioning can't be done together at this time. */
1396 gcc_assert (! (do_peeling && do_versioning));
1398 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1399 gcc_assert (stat);
1400 return stat;
1403 /* This point is reached if neither peeling nor versioning is being done. */
1404 gcc_assert (! (do_peeling || do_versioning));
1406 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1407 return stat;
1411 /* Function vect_analyze_data_refs_alignment
1413 Analyze the alignment of the data-references in the loop.
1414 Return FALSE if a data reference is found that cannot be vectorized. */
1416 bool
1417 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1418 bb_vec_info bb_vinfo)
1420 if (vect_print_dump_info (REPORT_DETAILS))
1421 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1423 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1425 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1426 fprintf (vect_dump,
1427 "not vectorized: can't calculate alignment for data ref.");
1428 return false;
1431 return true;
1435 /* Analyze groups of strided accesses: check that DR belongs to a group of
1436 strided accesses of legal size, step, etc. Detect gaps, single element
1437 interleaving, and other special cases. Set strided access info.
1438 Collect groups of strided stores for further use in SLP analysis. */
1440 static bool
1441 vect_analyze_group_access (struct data_reference *dr)
1443 tree step = DR_STEP (dr);
1444 tree scalar_type = TREE_TYPE (DR_REF (dr));
1445 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1446 gimple stmt = DR_STMT (dr);
1447 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1448 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1449 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1450 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1451 HOST_WIDE_INT stride;
1452 bool slp_impossible = false;
1454 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1455 interleaving group (including gaps). */
1456 stride = dr_step / type_size;
1458 /* Not consecutive access is possible only if it is a part of interleaving. */
1459 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1461 /* Check if it this DR is a part of interleaving, and is a single
1462 element of the group that is accessed in the loop. */
1464 /* Gaps are supported only for loads. STEP must be a multiple of the type
1465 size. The size of the group must be a power of 2. */
1466 if (DR_IS_READ (dr)
1467 && (dr_step % type_size) == 0
1468 && stride > 0
1469 && exact_log2 (stride) != -1)
1471 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1472 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1473 if (vect_print_dump_info (REPORT_DR_DETAILS))
1475 fprintf (vect_dump, "Detected single element interleaving ");
1476 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1477 fprintf (vect_dump, " step ");
1478 print_generic_expr (vect_dump, step, TDF_SLIM);
1480 return true;
1482 if (vect_print_dump_info (REPORT_DETAILS))
1483 fprintf (vect_dump, "not consecutive access");
1484 return false;
1487 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1489 /* First stmt in the interleaving chain. Check the chain. */
1490 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1491 struct data_reference *data_ref = dr;
1492 unsigned int count = 1;
1493 tree next_step;
1494 tree prev_init = DR_INIT (data_ref);
1495 gimple prev = stmt;
1496 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
1498 while (next)
1500 /* Skip same data-refs. In case that two or more stmts share data-ref
1501 (supported only for loads), we vectorize only the first stmt, and
1502 the rest get their vectorized loads from the first one. */
1503 if (!tree_int_cst_compare (DR_INIT (data_ref),
1504 DR_INIT (STMT_VINFO_DATA_REF (
1505 vinfo_for_stmt (next)))))
1507 if (!DR_IS_READ (data_ref))
1509 if (vect_print_dump_info (REPORT_DETAILS))
1510 fprintf (vect_dump, "Two store stmts share the same dr.");
1511 return false;
1514 /* Check that there is no load-store dependencies for this loads
1515 to prevent a case of load-store-load to the same location. */
1516 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1517 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1519 if (vect_print_dump_info (REPORT_DETAILS))
1520 fprintf (vect_dump,
1521 "READ_WRITE dependence in interleaving.");
1522 return false;
1525 /* For load use the same data-ref load. */
1526 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1528 prev = next;
1529 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1530 continue;
1532 prev = next;
1534 /* Check that all the accesses have the same STEP. */
1535 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1536 if (tree_int_cst_compare (step, next_step))
1538 if (vect_print_dump_info (REPORT_DETAILS))
1539 fprintf (vect_dump, "not consecutive access in interleaving");
1540 return false;
1543 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1544 /* Check that the distance between two accesses is equal to the type
1545 size. Otherwise, we have gaps. */
1546 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1547 - TREE_INT_CST_LOW (prev_init)) / type_size;
1548 if (diff != 1)
1550 /* FORNOW: SLP of accesses with gaps is not supported. */
1551 slp_impossible = true;
1552 if (!DR_IS_READ (data_ref))
1554 if (vect_print_dump_info (REPORT_DETAILS))
1555 fprintf (vect_dump, "interleaved store with gaps");
1556 return false;
1559 gaps += diff - 1;
1562 /* Store the gap from the previous member of the group. If there is no
1563 gap in the access, DR_GROUP_GAP is always 1. */
1564 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1566 prev_init = DR_INIT (data_ref);
1567 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1568 /* Count the number of data-refs in the chain. */
1569 count++;
1572 /* COUNT is the number of accesses found, we multiply it by the size of
1573 the type to get COUNT_IN_BYTES. */
1574 count_in_bytes = type_size * count;
1576 /* Check that the size of the interleaving (including gaps) is not
1577 greater than STEP. */
1578 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
1580 if (vect_print_dump_info (REPORT_DETAILS))
1582 fprintf (vect_dump, "interleaving size is greater than step for ");
1583 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1585 return false;
1588 /* Check that the size of the interleaving is equal to STEP for stores,
1589 i.e., that there are no gaps. */
1590 if (dr_step && dr_step != count_in_bytes)
1592 if (DR_IS_READ (dr))
1594 slp_impossible = true;
1595 /* There is a gap after the last load in the group. This gap is a
1596 difference between the stride and the number of elements. When
1597 there is no gap, this difference should be 0. */
1598 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1600 else
1602 if (vect_print_dump_info (REPORT_DETAILS))
1603 fprintf (vect_dump, "interleaved store with gaps");
1604 return false;
1608 /* Check that STEP is a multiple of type size. */
1609 if (dr_step && (dr_step % type_size) != 0)
1611 if (vect_print_dump_info (REPORT_DETAILS))
1613 fprintf (vect_dump, "step is not a multiple of type size: step ");
1614 print_generic_expr (vect_dump, step, TDF_SLIM);
1615 fprintf (vect_dump, " size ");
1616 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1617 TDF_SLIM);
1619 return false;
1622 /* FORNOW: we handle only interleaving that is a power of 2.
1623 We don't fail here if it may be still possible to vectorize the
1624 group using SLP. If not, the size of the group will be checked in
1625 vect_analyze_operations, and the vectorization will fail. */
1626 if (exact_log2 (stride) == -1)
1628 if (vect_print_dump_info (REPORT_DETAILS))
1629 fprintf (vect_dump, "interleaving is not a power of 2");
1631 if (slp_impossible)
1632 return false;
1635 if (stride == 0)
1636 stride = count;
1638 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1639 if (vect_print_dump_info (REPORT_DETAILS))
1640 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1642 /* SLP: create an SLP data structure for every interleaving group of
1643 stores for further analysis in vect_analyse_slp. */
1644 if (!DR_IS_READ (dr) && !slp_impossible)
1646 if (loop_vinfo)
1647 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
1648 stmt);
1649 if (bb_vinfo)
1650 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
1651 stmt);
1655 return true;
1659 /* Analyze the access pattern of the data-reference DR.
1660 In case of non-consecutive accesses call vect_analyze_group_access() to
1661 analyze groups of strided accesses. */
1663 static bool
1664 vect_analyze_data_ref_access (struct data_reference *dr)
1666 tree step = DR_STEP (dr);
1667 tree scalar_type = TREE_TYPE (DR_REF (dr));
1668 gimple stmt = DR_STMT (dr);
1669 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1670 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1671 struct loop *loop = NULL;
1672 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1674 if (loop_vinfo)
1675 loop = LOOP_VINFO_LOOP (loop_vinfo);
1677 if (loop_vinfo && !step)
1679 if (vect_print_dump_info (REPORT_DETAILS))
1680 fprintf (vect_dump, "bad data-ref access in loop");
1681 return false;
1684 /* Don't allow invariant accesses in loops. */
1685 if (loop_vinfo && dr_step == 0)
1686 return false;
1688 if (loop && nested_in_vect_loop_p (loop, stmt))
1690 /* Interleaved accesses are not yet supported within outer-loop
1691 vectorization for references in the inner-loop. */
1692 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1694 /* For the rest of the analysis we use the outer-loop step. */
1695 step = STMT_VINFO_DR_STEP (stmt_info);
1696 dr_step = TREE_INT_CST_LOW (step);
1698 if (dr_step == 0)
1700 if (vect_print_dump_info (REPORT_ALIGNMENT))
1701 fprintf (vect_dump, "zero step in outer loop.");
1702 if (DR_IS_READ (dr))
1703 return true;
1704 else
1705 return false;
1709 /* Consecutive? */
1710 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1712 /* Mark that it is not interleaving. */
1713 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1714 return true;
1717 if (loop && nested_in_vect_loop_p (loop, stmt))
1719 if (vect_print_dump_info (REPORT_ALIGNMENT))
1720 fprintf (vect_dump, "strided access in outer loop.");
1721 return false;
1724 /* Not consecutive access - check if it's a part of interleaving group. */
1725 return vect_analyze_group_access (dr);
1729 /* Function vect_analyze_data_ref_accesses.
1731 Analyze the access pattern of all the data references in the loop.
1733 FORNOW: the only access pattern that is considered vectorizable is a
1734 simple step 1 (consecutive) access.
1736 FORNOW: handle only arrays and pointer accesses. */
1738 bool
1739 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1741 unsigned int i;
1742 VEC (data_reference_p, heap) *datarefs;
1743 struct data_reference *dr;
1745 if (vect_print_dump_info (REPORT_DETAILS))
1746 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1748 if (loop_vinfo)
1749 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1750 else
1751 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1753 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1754 if (!vect_analyze_data_ref_access (dr))
1756 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1757 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1758 return false;
1761 return true;
1764 /* Function vect_prune_runtime_alias_test_list.
1766 Prune a list of ddrs to be tested at run-time by versioning for alias.
1767 Return FALSE if resulting list of ddrs is longer then allowed by
1768 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1770 bool
1771 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1773 VEC (ddr_p, heap) * ddrs =
1774 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1775 unsigned i, j;
1777 if (vect_print_dump_info (REPORT_DETAILS))
1778 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1780 for (i = 0; i < VEC_length (ddr_p, ddrs); )
1782 bool found;
1783 ddr_p ddr_i;
1785 ddr_i = VEC_index (ddr_p, ddrs, i);
1786 found = false;
1788 for (j = 0; j < i; j++)
1790 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1792 if (vect_vfa_range_equal (ddr_i, ddr_j))
1794 if (vect_print_dump_info (REPORT_DR_DETAILS))
1796 fprintf (vect_dump, "found equal ranges ");
1797 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1798 fprintf (vect_dump, ", ");
1799 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1800 fprintf (vect_dump, " and ");
1801 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1802 fprintf (vect_dump, ", ");
1803 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1805 found = true;
1806 break;
1810 if (found)
1812 VEC_ordered_remove (ddr_p, ddrs, i);
1813 continue;
1815 i++;
1818 if (VEC_length (ddr_p, ddrs) >
1819 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1821 if (vect_print_dump_info (REPORT_DR_DETAILS))
1823 fprintf (vect_dump,
1824 "disable versioning for alias - max number of generated "
1825 "checks exceeded.");
1828 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1830 return false;
1833 return true;
1837 /* Function vect_analyze_data_refs.
1839 Find all the data references in the loop or basic block.
1841 The general structure of the analysis of data refs in the vectorizer is as
1842 follows:
1843 1- vect_analyze_data_refs(loop/bb): call
1844 compute_data_dependences_for_loop/bb to find and analyze all data-refs
1845 in the loop/bb and their dependences.
1846 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1847 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1848 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1852 bool
1853 vect_analyze_data_refs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1855 struct loop *loop = NULL;
1856 basic_block bb = NULL;
1857 unsigned int i;
1858 VEC (data_reference_p, heap) *datarefs;
1859 struct data_reference *dr;
1860 tree scalar_type;
1862 if (vect_print_dump_info (REPORT_DETAILS))
1863 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1865 if (loop_vinfo)
1867 loop = LOOP_VINFO_LOOP (loop_vinfo);
1868 compute_data_dependences_for_loop (loop, true,
1869 &LOOP_VINFO_DATAREFS (loop_vinfo),
1870 &LOOP_VINFO_DDRS (loop_vinfo));
1871 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1873 else
1875 bb = BB_VINFO_BB (bb_vinfo);
1876 compute_data_dependences_for_bb (bb, true,
1877 &BB_VINFO_DATAREFS (bb_vinfo),
1878 &BB_VINFO_DDRS (bb_vinfo));
1879 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1882 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1883 from stmt_vec_info struct to DR and vectype. */
1885 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1887 gimple stmt;
1888 stmt_vec_info stmt_info;
1889 basic_block bb;
1890 tree base, offset, init;
1892 if (!dr || !DR_REF (dr))
1894 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1895 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1896 return false;
1899 stmt = DR_STMT (dr);
1900 stmt_info = vinfo_for_stmt (stmt);
1902 /* Check that analysis of the data-ref succeeded. */
1903 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1904 || !DR_STEP (dr))
1906 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1908 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1909 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1911 return false;
1914 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
1916 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1917 fprintf (vect_dump, "not vectorized: base addr of dr is a "
1918 "constant");
1919 return false;
1922 base = unshare_expr (DR_BASE_ADDRESS (dr));
1923 offset = unshare_expr (DR_OFFSET (dr));
1924 init = unshare_expr (DR_INIT (dr));
1926 /* Update DR field in stmt_vec_info struct. */
1927 bb = gimple_bb (stmt);
1929 /* If the dataref is in an inner-loop of the loop that is considered for
1930 for vectorization, we also want to analyze the access relative to
1931 the outer-loop (DR contains information only relative to the
1932 inner-most enclosing loop). We do that by building a reference to the
1933 first location accessed by the inner-loop, and analyze it relative to
1934 the outer-loop. */
1935 if (loop && nested_in_vect_loop_p (loop, stmt))
1937 tree outer_step, outer_base, outer_init;
1938 HOST_WIDE_INT pbitsize, pbitpos;
1939 tree poffset;
1940 enum machine_mode pmode;
1941 int punsignedp, pvolatilep;
1942 affine_iv base_iv, offset_iv;
1943 tree dinit;
1945 /* Build a reference to the first location accessed by the
1946 inner-loop: *(BASE+INIT). (The first location is actually
1947 BASE+INIT+OFFSET, but we add OFFSET separately later). */
1948 tree inner_base = build_fold_indirect_ref
1949 (fold_build2 (POINTER_PLUS_EXPR,
1950 TREE_TYPE (base), base,
1951 fold_convert (sizetype, init)));
1953 if (vect_print_dump_info (REPORT_DETAILS))
1955 fprintf (vect_dump, "analyze in outer-loop: ");
1956 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
1959 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
1960 &poffset, &pmode, &punsignedp, &pvolatilep, false);
1961 gcc_assert (outer_base != NULL_TREE);
1963 if (pbitpos % BITS_PER_UNIT != 0)
1965 if (vect_print_dump_info (REPORT_DETAILS))
1966 fprintf (vect_dump, "failed: bit offset alignment.\n");
1967 return false;
1970 outer_base = build_fold_addr_expr (outer_base);
1971 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
1972 &base_iv, false))
1974 if (vect_print_dump_info (REPORT_DETAILS))
1975 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
1976 return false;
1979 if (offset)
1981 if (poffset)
1982 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
1983 poffset);
1984 else
1985 poffset = offset;
1988 if (!poffset)
1990 offset_iv.base = ssize_int (0);
1991 offset_iv.step = ssize_int (0);
1993 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
1994 &offset_iv, false))
1996 if (vect_print_dump_info (REPORT_DETAILS))
1997 fprintf (vect_dump, "evolution of offset is not affine.\n");
1998 return false;
2001 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2002 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2003 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2004 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2005 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2007 outer_step = size_binop (PLUS_EXPR,
2008 fold_convert (ssizetype, base_iv.step),
2009 fold_convert (ssizetype, offset_iv.step));
2011 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2012 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2013 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2014 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2015 STMT_VINFO_DR_OFFSET (stmt_info) =
2016 fold_convert (ssizetype, offset_iv.base);
2017 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2018 size_int (highest_pow2_factor (offset_iv.base));
2020 if (vect_print_dump_info (REPORT_DETAILS))
2022 fprintf (vect_dump, "\touter base_address: ");
2023 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2024 fprintf (vect_dump, "\n\touter offset from base address: ");
2025 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2026 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2027 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2028 fprintf (vect_dump, "\n\touter step: ");
2029 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2030 fprintf (vect_dump, "\n\touter aligned to: ");
2031 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2035 if (STMT_VINFO_DATA_REF (stmt_info))
2037 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2039 fprintf (vect_dump,
2040 "not vectorized: more than one data ref in stmt: ");
2041 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2043 return false;
2046 STMT_VINFO_DATA_REF (stmt_info) = dr;
2048 /* Set vectype for STMT. */
2049 scalar_type = TREE_TYPE (DR_REF (dr));
2050 STMT_VINFO_VECTYPE (stmt_info) =
2051 get_vectype_for_scalar_type (scalar_type);
2052 if (!STMT_VINFO_VECTYPE (stmt_info))
2054 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2056 fprintf (vect_dump,
2057 "not vectorized: no vectype for stmt: ");
2058 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2059 fprintf (vect_dump, " scalar_type: ");
2060 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2062 return false;
2066 return true;
2070 /* Function vect_get_new_vect_var.
2072 Returns a name for a new variable. The current naming scheme appends the
2073 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2074 the name of vectorizer generated variables, and appends that to NAME if
2075 provided. */
2077 tree
2078 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2080 const char *prefix;
2081 tree new_vect_var;
2083 switch (var_kind)
2085 case vect_simple_var:
2086 prefix = "vect_";
2087 break;
2088 case vect_scalar_var:
2089 prefix = "stmp_";
2090 break;
2091 case vect_pointer_var:
2092 prefix = "vect_p";
2093 break;
2094 default:
2095 gcc_unreachable ();
2098 if (name)
2100 char* tmp = concat (prefix, name, NULL);
2101 new_vect_var = create_tmp_var (type, tmp);
2102 free (tmp);
2104 else
2105 new_vect_var = create_tmp_var (type, prefix);
2107 /* Mark vector typed variable as a gimple register variable. */
2108 if (TREE_CODE (type) == VECTOR_TYPE)
2109 DECL_GIMPLE_REG_P (new_vect_var) = true;
2111 return new_vect_var;
2115 /* Function vect_create_addr_base_for_vector_ref.
2117 Create an expression that computes the address of the first memory location
2118 that will be accessed for a data reference.
2120 Input:
2121 STMT: The statement containing the data reference.
2122 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2123 OFFSET: Optional. If supplied, it is be added to the initial address.
2124 LOOP: Specify relative to which loop-nest should the address be computed.
2125 For example, when the dataref is in an inner-loop nested in an
2126 outer-loop that is now being vectorized, LOOP can be either the
2127 outer-loop, or the inner-loop. The first memory location accessed
2128 by the following dataref ('in' points to short):
2130 for (i=0; i<N; i++)
2131 for (j=0; j<M; j++)
2132 s += in[i+j]
2134 is as follows:
2135 if LOOP=i_loop: &in (relative to i_loop)
2136 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2138 Output:
2139 1. Return an SSA_NAME whose value is the address of the memory location of
2140 the first vector of the data reference.
2141 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2142 these statement(s) which define the returned SSA_NAME.
2144 FORNOW: We are only handling array accesses with step 1. */
2146 tree
2147 vect_create_addr_base_for_vector_ref (gimple stmt,
2148 gimple_seq *new_stmt_list,
2149 tree offset,
2150 struct loop *loop)
2152 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2153 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2154 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2155 tree base_name;
2156 tree data_ref_base_var;
2157 tree vec_stmt;
2158 tree addr_base, addr_expr;
2159 tree dest;
2160 gimple_seq seq = NULL;
2161 tree base_offset = unshare_expr (DR_OFFSET (dr));
2162 tree init = unshare_expr (DR_INIT (dr));
2163 tree vect_ptr_type;
2164 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2165 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2167 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2169 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2171 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2173 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2174 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2175 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2178 if (loop_vinfo)
2179 base_name = build_fold_indirect_ref (data_ref_base);
2180 else
2182 base_offset = ssize_int (0);
2183 init = ssize_int (0);
2184 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2187 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2188 add_referenced_var (data_ref_base_var);
2189 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2190 data_ref_base_var);
2191 gimple_seq_add_seq (new_stmt_list, seq);
2193 /* Create base_offset */
2194 base_offset = size_binop (PLUS_EXPR,
2195 fold_convert (sizetype, base_offset),
2196 fold_convert (sizetype, init));
2197 dest = create_tmp_var (sizetype, "base_off");
2198 add_referenced_var (dest);
2199 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2200 gimple_seq_add_seq (new_stmt_list, seq);
2202 if (offset)
2204 tree tmp = create_tmp_var (sizetype, "offset");
2206 add_referenced_var (tmp);
2207 offset = fold_build2 (MULT_EXPR, sizetype,
2208 fold_convert (sizetype, offset), step);
2209 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2210 base_offset, offset);
2211 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2212 gimple_seq_add_seq (new_stmt_list, seq);
2215 /* base + base_offset */
2216 if (loop_vinfo)
2217 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2218 data_ref_base, base_offset);
2219 else
2221 if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF)
2222 addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0));
2223 else
2224 addr_base = build1 (ADDR_EXPR,
2225 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2226 unshare_expr (DR_REF (dr)));
2229 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2231 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2232 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2233 get_name (base_name));
2234 add_referenced_var (addr_expr);
2235 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2236 gimple_seq_add_seq (new_stmt_list, seq);
2238 if (vect_print_dump_info (REPORT_DETAILS))
2240 fprintf (vect_dump, "created ");
2241 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2244 return vec_stmt;
2248 /* Function vect_create_data_ref_ptr.
2250 Create a new pointer to vector type (vp), that points to the first location
2251 accessed in the loop by STMT, along with the def-use update chain to
2252 appropriately advance the pointer through the loop iterations. Also set
2253 aliasing information for the pointer. This vector pointer is used by the
2254 callers to this function to create a memory reference expression for vector
2255 load/store access.
2257 Input:
2258 1. STMT: a stmt that references memory. Expected to be of the form
2259 GIMPLE_ASSIGN <name, data-ref> or
2260 GIMPLE_ASSIGN <data-ref, name>.
2261 2. AT_LOOP: the loop where the vector memref is to be created.
2262 3. OFFSET (optional): an offset to be added to the initial address accessed
2263 by the data-ref in STMT.
2264 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2265 pointing to the initial address.
2266 5. TYPE: if not NULL indicates the required type of the data-ref.
2268 Output:
2269 1. Declare a new ptr to vector_type, and have it point to the base of the
2270 data reference (initial addressed accessed by the data reference).
2271 For example, for vector of type V8HI, the following code is generated:
2273 v8hi *vp;
2274 vp = (v8hi *)initial_address;
2276 if OFFSET is not supplied:
2277 initial_address = &a[init];
2278 if OFFSET is supplied:
2279 initial_address = &a[init + OFFSET];
2281 Return the initial_address in INITIAL_ADDRESS.
2283 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2284 update the pointer in each iteration of the loop.
2286 Return the increment stmt that updates the pointer in PTR_INCR.
2288 3. Set INV_P to true if the access pattern of the data reference in the
2289 vectorized loop is invariant. Set it to false otherwise.
2291 4. Return the pointer. */
2293 tree
2294 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2295 tree offset, tree *initial_address, gimple *ptr_incr,
2296 bool only_init, bool *inv_p)
2298 tree base_name;
2299 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2300 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2301 struct loop *loop = NULL;
2302 bool nested_in_vect_loop = false;
2303 struct loop *containing_loop = NULL;
2304 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2305 tree vect_ptr_type;
2306 tree vect_ptr;
2307 tree new_temp;
2308 gimple vec_stmt;
2309 gimple_seq new_stmt_list = NULL;
2310 edge pe = NULL;
2311 basic_block new_bb;
2312 tree vect_ptr_init;
2313 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2314 tree vptr;
2315 gimple_stmt_iterator incr_gsi;
2316 bool insert_after;
2317 tree indx_before_incr, indx_after_incr;
2318 gimple incr;
2319 tree step;
2320 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2321 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2323 if (loop_vinfo)
2325 loop = LOOP_VINFO_LOOP (loop_vinfo);
2326 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2327 containing_loop = (gimple_bb (stmt))->loop_father;
2328 pe = loop_preheader_edge (loop);
2330 else
2332 gcc_assert (bb_vinfo);
2333 only_init = true;
2334 *ptr_incr = NULL;
2337 /* Check the step (evolution) of the load in LOOP, and record
2338 whether it's invariant. */
2339 if (nested_in_vect_loop)
2340 step = STMT_VINFO_DR_STEP (stmt_info);
2341 else
2342 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2344 if (tree_int_cst_compare (step, size_zero_node) == 0)
2345 *inv_p = true;
2346 else
2347 *inv_p = false;
2349 /* Create an expression for the first address accessed by this load
2350 in LOOP. */
2351 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2353 if (vect_print_dump_info (REPORT_DETAILS))
2355 tree data_ref_base = base_name;
2356 fprintf (vect_dump, "create vector-pointer variable to type: ");
2357 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2358 if (TREE_CODE (data_ref_base) == VAR_DECL
2359 || TREE_CODE (data_ref_base) == ARRAY_REF)
2360 fprintf (vect_dump, " vectorizing an array ref: ");
2361 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2362 fprintf (vect_dump, " vectorizing a record based array ref: ");
2363 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2364 fprintf (vect_dump, " vectorizing a pointer ref: ");
2365 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2368 /** (1) Create the new vector-pointer variable: **/
2369 vect_ptr_type = build_pointer_type (vectype);
2370 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2371 get_name (base_name));
2372 /* If any of the data-references in the stmt group does not conflict
2373 with the created vector data-reference use a ref-all pointer instead. */
2374 if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2376 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2379 tree lhs = gimple_assign_lhs (orig_stmt);
2380 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2381 get_alias_set (lhs)))
2383 vect_ptr_type = build_pointer_type_for_mode (vectype,
2384 ptr_mode, true);
2385 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2386 get_name (base_name));
2387 break;
2390 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2392 while (orig_stmt);
2395 add_referenced_var (vect_ptr);
2397 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2398 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2399 def-use update cycles for the pointer: One relative to the outer-loop
2400 (LOOP), which is what steps (3) and (4) below do. The other is relative
2401 to the inner-loop (which is the inner-most loop containing the dataref),
2402 and this is done be step (5) below.
2404 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2405 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2406 redundant. Steps (3),(4) create the following:
2408 vp0 = &base_addr;
2409 LOOP: vp1 = phi(vp0,vp2)
2410 ...
2412 vp2 = vp1 + step
2413 goto LOOP
2415 If there is an inner-loop nested in loop, then step (5) will also be
2416 applied, and an additional update in the inner-loop will be created:
2418 vp0 = &base_addr;
2419 LOOP: vp1 = phi(vp0,vp2)
2421 inner: vp3 = phi(vp1,vp4)
2422 vp4 = vp3 + inner_step
2423 if () goto inner
2425 vp2 = vp1 + step
2426 if () goto LOOP */
2428 /** (3) Calculate the initial address the vector-pointer, and set
2429 the vector-pointer to point to it before the loop: **/
2431 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2433 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2434 offset, loop);
2435 if (new_stmt_list)
2437 if (pe)
2439 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2440 gcc_assert (!new_bb);
2442 else
2443 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
2446 *initial_address = new_temp;
2448 /* Create: p = (vectype *) initial_base */
2449 vec_stmt = gimple_build_assign (vect_ptr,
2450 fold_convert (vect_ptr_type, new_temp));
2451 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2452 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2453 if (pe)
2455 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2456 gcc_assert (!new_bb);
2458 else
2459 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
2461 /** (4) Handle the updating of the vector-pointer inside the loop.
2462 This is needed when ONLY_INIT is false, and also when AT_LOOP
2463 is the inner-loop nested in LOOP (during outer-loop vectorization).
2466 /* No update in loop is required. */
2467 if (only_init && (!loop_vinfo || at_loop == loop))
2469 /* Copy the points-to information if it exists. */
2470 if (DR_PTR_INFO (dr))
2471 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2472 vptr = vect_ptr_init;
2474 else
2476 /* The step of the vector pointer is the Vector Size. */
2477 tree step = TYPE_SIZE_UNIT (vectype);
2478 /* One exception to the above is when the scalar step of the load in
2479 LOOP is zero. In this case the step here is also zero. */
2480 if (*inv_p)
2481 step = size_zero_node;
2483 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2485 create_iv (vect_ptr_init,
2486 fold_convert (vect_ptr_type, step),
2487 vect_ptr, loop, &incr_gsi, insert_after,
2488 &indx_before_incr, &indx_after_incr);
2489 incr = gsi_stmt (incr_gsi);
2490 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2492 /* Copy the points-to information if it exists. */
2493 if (DR_PTR_INFO (dr))
2495 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2496 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2498 if (ptr_incr)
2499 *ptr_incr = incr;
2501 vptr = indx_before_incr;
2504 if (!nested_in_vect_loop || only_init)
2505 return vptr;
2508 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2509 nested in LOOP, if exists: **/
2511 gcc_assert (nested_in_vect_loop);
2512 if (!only_init)
2514 standard_iv_increment_position (containing_loop, &incr_gsi,
2515 &insert_after);
2516 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2517 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2518 &indx_after_incr);
2519 incr = gsi_stmt (incr_gsi);
2520 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2522 /* Copy the points-to information if it exists. */
2523 if (DR_PTR_INFO (dr))
2525 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2526 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2528 if (ptr_incr)
2529 *ptr_incr = incr;
2531 return indx_before_incr;
2533 else
2534 gcc_unreachable ();
2538 /* Function bump_vector_ptr
2540 Increment a pointer (to a vector type) by vector-size. If requested,
2541 i.e. if PTR-INCR is given, then also connect the new increment stmt
2542 to the existing def-use update-chain of the pointer, by modifying
2543 the PTR_INCR as illustrated below:
2545 The pointer def-use update-chain before this function:
2546 DATAREF_PTR = phi (p_0, p_2)
2547 ....
2548 PTR_INCR: p_2 = DATAREF_PTR + step
2550 The pointer def-use update-chain after this function:
2551 DATAREF_PTR = phi (p_0, p_2)
2552 ....
2553 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2554 ....
2555 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2557 Input:
2558 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2559 in the loop.
2560 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2561 the loop. The increment amount across iterations is expected
2562 to be vector_size.
2563 BSI - location where the new update stmt is to be placed.
2564 STMT - the original scalar memory-access stmt that is being vectorized.
2565 BUMP - optional. The offset by which to bump the pointer. If not given,
2566 the offset is assumed to be vector_size.
2568 Output: Return NEW_DATAREF_PTR as illustrated above.
2572 tree
2573 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2574 gimple stmt, tree bump)
2576 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2577 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2578 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2579 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2580 tree update = TYPE_SIZE_UNIT (vectype);
2581 gimple incr_stmt;
2582 ssa_op_iter iter;
2583 use_operand_p use_p;
2584 tree new_dataref_ptr;
2586 if (bump)
2587 update = bump;
2589 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2590 dataref_ptr, update);
2591 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2592 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2593 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2595 /* Copy the points-to information if it exists. */
2596 if (DR_PTR_INFO (dr))
2597 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2599 if (!ptr_incr)
2600 return new_dataref_ptr;
2602 /* Update the vector-pointer's cross-iteration increment. */
2603 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2605 tree use = USE_FROM_PTR (use_p);
2607 if (use == dataref_ptr)
2608 SET_USE (use_p, new_dataref_ptr);
2609 else
2610 gcc_assert (tree_int_cst_compare (use, update) == 0);
2613 return new_dataref_ptr;
2617 /* Function vect_create_destination_var.
2619 Create a new temporary of type VECTYPE. */
2621 tree
2622 vect_create_destination_var (tree scalar_dest, tree vectype)
2624 tree vec_dest;
2625 const char *new_name;
2626 tree type;
2627 enum vect_var_kind kind;
2629 kind = vectype ? vect_simple_var : vect_scalar_var;
2630 type = vectype ? vectype : TREE_TYPE (scalar_dest);
2632 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2634 new_name = get_name (scalar_dest);
2635 if (!new_name)
2636 new_name = "var_";
2637 vec_dest = vect_get_new_vect_var (type, kind, new_name);
2638 add_referenced_var (vec_dest);
2640 return vec_dest;
2643 /* Function vect_strided_store_supported.
2645 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2646 and FALSE otherwise. */
2648 bool
2649 vect_strided_store_supported (tree vectype)
2651 optab interleave_high_optab, interleave_low_optab;
2652 int mode;
2654 mode = (int) TYPE_MODE (vectype);
2656 /* Check that the operation is supported. */
2657 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2658 vectype, optab_default);
2659 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2660 vectype, optab_default);
2661 if (!interleave_high_optab || !interleave_low_optab)
2663 if (vect_print_dump_info (REPORT_DETAILS))
2664 fprintf (vect_dump, "no optab for interleave.");
2665 return false;
2668 if (optab_handler (interleave_high_optab, mode)->insn_code
2669 == CODE_FOR_nothing
2670 || optab_handler (interleave_low_optab, mode)->insn_code
2671 == CODE_FOR_nothing)
2673 if (vect_print_dump_info (REPORT_DETAILS))
2674 fprintf (vect_dump, "interleave op not supported by target.");
2675 return false;
2678 return true;
2682 /* Function vect_permute_store_chain.
2684 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2685 a power of 2, generate interleave_high/low stmts to reorder the data
2686 correctly for the stores. Return the final references for stores in
2687 RESULT_CHAIN.
2689 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2690 The input is 4 vectors each containing 8 elements. We assign a number to each
2691 element, the input sequence is:
2693 1st vec: 0 1 2 3 4 5 6 7
2694 2nd vec: 8 9 10 11 12 13 14 15
2695 3rd vec: 16 17 18 19 20 21 22 23
2696 4th vec: 24 25 26 27 28 29 30 31
2698 The output sequence should be:
2700 1st vec: 0 8 16 24 1 9 17 25
2701 2nd vec: 2 10 18 26 3 11 19 27
2702 3rd vec: 4 12 20 28 5 13 21 30
2703 4th vec: 6 14 22 30 7 15 23 31
2705 i.e., we interleave the contents of the four vectors in their order.
2707 We use interleave_high/low instructions to create such output. The input of
2708 each interleave_high/low operation is two vectors:
2709 1st vec 2nd vec
2710 0 1 2 3 4 5 6 7
2711 the even elements of the result vector are obtained left-to-right from the
2712 high/low elements of the first vector. The odd elements of the result are
2713 obtained left-to-right from the high/low elements of the second vector.
2714 The output of interleave_high will be: 0 4 1 5
2715 and of interleave_low: 2 6 3 7
2718 The permutation is done in log LENGTH stages. In each stage interleave_high
2719 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2720 where the first argument is taken from the first half of DR_CHAIN and the
2721 second argument from it's second half.
2722 In our example,
2724 I1: interleave_high (1st vec, 3rd vec)
2725 I2: interleave_low (1st vec, 3rd vec)
2726 I3: interleave_high (2nd vec, 4th vec)
2727 I4: interleave_low (2nd vec, 4th vec)
2729 The output for the first stage is:
2731 I1: 0 16 1 17 2 18 3 19
2732 I2: 4 20 5 21 6 22 7 23
2733 I3: 8 24 9 25 10 26 11 27
2734 I4: 12 28 13 29 14 30 15 31
2736 The output of the second stage, i.e. the final result is:
2738 I1: 0 8 16 24 1 9 17 25
2739 I2: 2 10 18 26 3 11 19 27
2740 I3: 4 12 20 28 5 13 21 30
2741 I4: 6 14 22 30 7 15 23 31. */
2743 bool
2744 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2745 unsigned int length,
2746 gimple stmt,
2747 gimple_stmt_iterator *gsi,
2748 VEC(tree,heap) **result_chain)
2750 tree perm_dest, vect1, vect2, high, low;
2751 gimple perm_stmt;
2752 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2753 tree scalar_dest;
2754 int i;
2755 unsigned int j;
2756 enum tree_code high_code, low_code;
2758 scalar_dest = gimple_assign_lhs (stmt);
2760 /* Check that the operation is supported. */
2761 if (!vect_strided_store_supported (vectype))
2762 return false;
2764 *result_chain = VEC_copy (tree, heap, dr_chain);
2766 for (i = 0; i < exact_log2 (length); i++)
2768 for (j = 0; j < length/2; j++)
2770 vect1 = VEC_index (tree, dr_chain, j);
2771 vect2 = VEC_index (tree, dr_chain, j+length/2);
2773 /* Create interleaving stmt:
2774 in the case of big endian:
2775 high = interleave_high (vect1, vect2)
2776 and in the case of little endian:
2777 high = interleave_low (vect1, vect2). */
2778 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2779 DECL_GIMPLE_REG_P (perm_dest) = 1;
2780 add_referenced_var (perm_dest);
2781 if (BYTES_BIG_ENDIAN)
2783 high_code = VEC_INTERLEAVE_HIGH_EXPR;
2784 low_code = VEC_INTERLEAVE_LOW_EXPR;
2786 else
2788 low_code = VEC_INTERLEAVE_HIGH_EXPR;
2789 high_code = VEC_INTERLEAVE_LOW_EXPR;
2791 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2792 vect1, vect2);
2793 high = make_ssa_name (perm_dest, perm_stmt);
2794 gimple_assign_set_lhs (perm_stmt, high);
2795 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2796 VEC_replace (tree, *result_chain, 2*j, high);
2798 /* Create interleaving stmt:
2799 in the case of big endian:
2800 low = interleave_low (vect1, vect2)
2801 and in the case of little endian:
2802 low = interleave_high (vect1, vect2). */
2803 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2804 DECL_GIMPLE_REG_P (perm_dest) = 1;
2805 add_referenced_var (perm_dest);
2806 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2807 vect1, vect2);
2808 low = make_ssa_name (perm_dest, perm_stmt);
2809 gimple_assign_set_lhs (perm_stmt, low);
2810 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2811 VEC_replace (tree, *result_chain, 2*j+1, low);
2813 dr_chain = VEC_copy (tree, heap, *result_chain);
2815 return true;
2818 /* Function vect_setup_realignment
2820 This function is called when vectorizing an unaligned load using
2821 the dr_explicit_realign[_optimized] scheme.
2822 This function generates the following code at the loop prolog:
2824 p = initial_addr;
2825 x msq_init = *(floor(p)); # prolog load
2826 realignment_token = call target_builtin;
2827 loop:
2828 x msq = phi (msq_init, ---)
2830 The stmts marked with x are generated only for the case of
2831 dr_explicit_realign_optimized.
2833 The code above sets up a new (vector) pointer, pointing to the first
2834 location accessed by STMT, and a "floor-aligned" load using that pointer.
2835 It also generates code to compute the "realignment-token" (if the relevant
2836 target hook was defined), and creates a phi-node at the loop-header bb
2837 whose arguments are the result of the prolog-load (created by this
2838 function) and the result of a load that takes place in the loop (to be
2839 created by the caller to this function).
2841 For the case of dr_explicit_realign_optimized:
2842 The caller to this function uses the phi-result (msq) to create the
2843 realignment code inside the loop, and sets up the missing phi argument,
2844 as follows:
2845 loop:
2846 msq = phi (msq_init, lsq)
2847 lsq = *(floor(p')); # load in loop
2848 result = realign_load (msq, lsq, realignment_token);
2850 For the case of dr_explicit_realign:
2851 loop:
2852 msq = *(floor(p)); # load in loop
2853 p' = p + (VS-1);
2854 lsq = *(floor(p')); # load in loop
2855 result = realign_load (msq, lsq, realignment_token);
2857 Input:
2858 STMT - (scalar) load stmt to be vectorized. This load accesses
2859 a memory location that may be unaligned.
2860 BSI - place where new code is to be inserted.
2861 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2862 is used.
2864 Output:
2865 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2866 target hook, if defined.
2867 Return value - the result of the loop-header phi node. */
2869 tree
2870 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2871 tree *realignment_token,
2872 enum dr_alignment_support alignment_support_scheme,
2873 tree init_addr,
2874 struct loop **at_loop)
2876 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2877 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2878 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2879 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2880 edge pe;
2881 tree scalar_dest = gimple_assign_lhs (stmt);
2882 tree vec_dest;
2883 gimple inc;
2884 tree ptr;
2885 tree data_ref;
2886 gimple new_stmt;
2887 basic_block new_bb;
2888 tree msq_init = NULL_TREE;
2889 tree new_temp;
2890 gimple phi_stmt;
2891 tree msq = NULL_TREE;
2892 gimple_seq stmts = NULL;
2893 bool inv_p;
2894 bool compute_in_loop = false;
2895 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2896 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2897 struct loop *loop_for_initial_load;
2899 gcc_assert (alignment_support_scheme == dr_explicit_realign
2900 || alignment_support_scheme == dr_explicit_realign_optimized);
2902 /* We need to generate three things:
2903 1. the misalignment computation
2904 2. the extra vector load (for the optimized realignment scheme).
2905 3. the phi node for the two vectors from which the realignment is
2906 done (for the optimized realignment scheme).
2909 /* 1. Determine where to generate the misalignment computation.
2911 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2912 calculation will be generated by this function, outside the loop (in the
2913 preheader). Otherwise, INIT_ADDR had already been computed for us by the
2914 caller, inside the loop.
2916 Background: If the misalignment remains fixed throughout the iterations of
2917 the loop, then both realignment schemes are applicable, and also the
2918 misalignment computation can be done outside LOOP. This is because we are
2919 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2920 are a multiple of VS (the Vector Size), and therefore the misalignment in
2921 different vectorized LOOP iterations is always the same.
2922 The problem arises only if the memory access is in an inner-loop nested
2923 inside LOOP, which is now being vectorized using outer-loop vectorization.
2924 This is the only case when the misalignment of the memory access may not
2925 remain fixed throughout the iterations of the inner-loop (as explained in
2926 detail in vect_supportable_dr_alignment). In this case, not only is the
2927 optimized realignment scheme not applicable, but also the misalignment
2928 computation (and generation of the realignment token that is passed to
2929 REALIGN_LOAD) have to be done inside the loop.
2931 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2932 or not, which in turn determines if the misalignment is computed inside
2933 the inner-loop, or outside LOOP. */
2935 if (init_addr != NULL_TREE)
2937 compute_in_loop = true;
2938 gcc_assert (alignment_support_scheme == dr_explicit_realign);
2942 /* 2. Determine where to generate the extra vector load.
2944 For the optimized realignment scheme, instead of generating two vector
2945 loads in each iteration, we generate a single extra vector load in the
2946 preheader of the loop, and in each iteration reuse the result of the
2947 vector load from the previous iteration. In case the memory access is in
2948 an inner-loop nested inside LOOP, which is now being vectorized using
2949 outer-loop vectorization, we need to determine whether this initial vector
2950 load should be generated at the preheader of the inner-loop, or can be
2951 generated at the preheader of LOOP. If the memory access has no evolution
2952 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
2953 to be generated inside LOOP (in the preheader of the inner-loop). */
2955 if (nested_in_vect_loop)
2957 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
2958 bool invariant_in_outerloop =
2959 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
2960 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
2962 else
2963 loop_for_initial_load = loop;
2964 if (at_loop)
2965 *at_loop = loop_for_initial_load;
2967 /* 3. For the case of the optimized realignment, create the first vector
2968 load at the loop preheader. */
2970 if (alignment_support_scheme == dr_explicit_realign_optimized)
2972 /* Create msq_init = *(floor(p1)) in the loop preheader */
2974 gcc_assert (!compute_in_loop);
2975 pe = loop_preheader_edge (loop_for_initial_load);
2976 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2977 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
2978 &init_addr, &inc, true, &inv_p);
2979 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2980 new_stmt = gimple_build_assign (vec_dest, data_ref);
2981 new_temp = make_ssa_name (vec_dest, new_stmt);
2982 gimple_assign_set_lhs (new_stmt, new_temp);
2983 mark_symbols_for_renaming (new_stmt);
2984 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
2985 gcc_assert (!new_bb);
2986 msq_init = gimple_assign_lhs (new_stmt);
2989 /* 4. Create realignment token using a target builtin, if available.
2990 It is done either inside the containing loop, or before LOOP (as
2991 determined above). */
2993 if (targetm.vectorize.builtin_mask_for_load)
2995 tree builtin_decl;
2997 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
2998 if (compute_in_loop)
2999 gcc_assert (init_addr); /* already computed by the caller. */
3000 else
3002 /* Generate the INIT_ADDR computation outside LOOP. */
3003 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3004 NULL_TREE, loop);
3005 pe = loop_preheader_edge (loop);
3006 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3007 gcc_assert (!new_bb);
3010 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3011 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3012 vec_dest =
3013 vect_create_destination_var (scalar_dest,
3014 gimple_call_return_type (new_stmt));
3015 new_temp = make_ssa_name (vec_dest, new_stmt);
3016 gimple_call_set_lhs (new_stmt, new_temp);
3018 if (compute_in_loop)
3019 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3020 else
3022 /* Generate the misalignment computation outside LOOP. */
3023 pe = loop_preheader_edge (loop);
3024 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3025 gcc_assert (!new_bb);
3028 *realignment_token = gimple_call_lhs (new_stmt);
3030 /* The result of the CALL_EXPR to this builtin is determined from
3031 the value of the parameter and no global variables are touched
3032 which makes the builtin a "const" function. Requiring the
3033 builtin to have the "const" attribute makes it unnecessary
3034 to call mark_call_clobbered. */
3035 gcc_assert (TREE_READONLY (builtin_decl));
3038 if (alignment_support_scheme == dr_explicit_realign)
3039 return msq;
3041 gcc_assert (!compute_in_loop);
3042 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3045 /* 5. Create msq = phi <msq_init, lsq> in loop */
3047 pe = loop_preheader_edge (containing_loop);
3048 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3049 msq = make_ssa_name (vec_dest, NULL);
3050 phi_stmt = create_phi_node (msq, containing_loop->header);
3051 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3052 add_phi_arg (phi_stmt, msq_init, pe);
3054 return msq;
3058 /* Function vect_strided_load_supported.
3060 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3061 and FALSE otherwise. */
3063 bool
3064 vect_strided_load_supported (tree vectype)
3066 optab perm_even_optab, perm_odd_optab;
3067 int mode;
3069 mode = (int) TYPE_MODE (vectype);
3071 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3072 optab_default);
3073 if (!perm_even_optab)
3075 if (vect_print_dump_info (REPORT_DETAILS))
3076 fprintf (vect_dump, "no optab for perm_even.");
3077 return false;
3080 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
3082 if (vect_print_dump_info (REPORT_DETAILS))
3083 fprintf (vect_dump, "perm_even op not supported by target.");
3084 return false;
3087 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3088 optab_default);
3089 if (!perm_odd_optab)
3091 if (vect_print_dump_info (REPORT_DETAILS))
3092 fprintf (vect_dump, "no optab for perm_odd.");
3093 return false;
3096 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
3098 if (vect_print_dump_info (REPORT_DETAILS))
3099 fprintf (vect_dump, "perm_odd op not supported by target.");
3100 return false;
3102 return true;
3106 /* Function vect_permute_load_chain.
3108 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3109 a power of 2, generate extract_even/odd stmts to reorder the input data
3110 correctly. Return the final references for loads in RESULT_CHAIN.
3112 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3113 The input is 4 vectors each containing 8 elements. We assign a number to each
3114 element, the input sequence is:
3116 1st vec: 0 1 2 3 4 5 6 7
3117 2nd vec: 8 9 10 11 12 13 14 15
3118 3rd vec: 16 17 18 19 20 21 22 23
3119 4th vec: 24 25 26 27 28 29 30 31
3121 The output sequence should be:
3123 1st vec: 0 4 8 12 16 20 24 28
3124 2nd vec: 1 5 9 13 17 21 25 29
3125 3rd vec: 2 6 10 14 18 22 26 30
3126 4th vec: 3 7 11 15 19 23 27 31
3128 i.e., the first output vector should contain the first elements of each
3129 interleaving group, etc.
3131 We use extract_even/odd instructions to create such output. The input of each
3132 extract_even/odd operation is two vectors
3133 1st vec 2nd vec
3134 0 1 2 3 4 5 6 7
3136 and the output is the vector of extracted even/odd elements. The output of
3137 extract_even will be: 0 2 4 6
3138 and of extract_odd: 1 3 5 7
3141 The permutation is done in log LENGTH stages. In each stage extract_even and
3142 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3143 order. In our example,
3145 E1: extract_even (1st vec, 2nd vec)
3146 E2: extract_odd (1st vec, 2nd vec)
3147 E3: extract_even (3rd vec, 4th vec)
3148 E4: extract_odd (3rd vec, 4th vec)
3150 The output for the first stage will be:
3152 E1: 0 2 4 6 8 10 12 14
3153 E2: 1 3 5 7 9 11 13 15
3154 E3: 16 18 20 22 24 26 28 30
3155 E4: 17 19 21 23 25 27 29 31
3157 In order to proceed and create the correct sequence for the next stage (or
3158 for the correct output, if the second stage is the last one, as in our
3159 example), we first put the output of extract_even operation and then the
3160 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3161 The input for the second stage is:
3163 1st vec (E1): 0 2 4 6 8 10 12 14
3164 2nd vec (E3): 16 18 20 22 24 26 28 30
3165 3rd vec (E2): 1 3 5 7 9 11 13 15
3166 4th vec (E4): 17 19 21 23 25 27 29 31
3168 The output of the second stage:
3170 E1: 0 4 8 12 16 20 24 28
3171 E2: 2 6 10 14 18 22 26 30
3172 E3: 1 5 9 13 17 21 25 29
3173 E4: 3 7 11 15 19 23 27 31
3175 And RESULT_CHAIN after reordering:
3177 1st vec (E1): 0 4 8 12 16 20 24 28
3178 2nd vec (E3): 1 5 9 13 17 21 25 29
3179 3rd vec (E2): 2 6 10 14 18 22 26 30
3180 4th vec (E4): 3 7 11 15 19 23 27 31. */
3182 bool
3183 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3184 unsigned int length,
3185 gimple stmt,
3186 gimple_stmt_iterator *gsi,
3187 VEC(tree,heap) **result_chain)
3189 tree perm_dest, data_ref, first_vect, second_vect;
3190 gimple perm_stmt;
3191 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3192 int i;
3193 unsigned int j;
3195 /* Check that the operation is supported. */
3196 if (!vect_strided_load_supported (vectype))
3197 return false;
3199 *result_chain = VEC_copy (tree, heap, dr_chain);
3200 for (i = 0; i < exact_log2 (length); i++)
3202 for (j = 0; j < length; j +=2)
3204 first_vect = VEC_index (tree, dr_chain, j);
3205 second_vect = VEC_index (tree, dr_chain, j+1);
3207 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3208 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3209 DECL_GIMPLE_REG_P (perm_dest) = 1;
3210 add_referenced_var (perm_dest);
3212 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3213 perm_dest, first_vect,
3214 second_vect);
3216 data_ref = make_ssa_name (perm_dest, perm_stmt);
3217 gimple_assign_set_lhs (perm_stmt, data_ref);
3218 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3219 mark_symbols_for_renaming (perm_stmt);
3221 VEC_replace (tree, *result_chain, j/2, data_ref);
3223 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3224 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3225 DECL_GIMPLE_REG_P (perm_dest) = 1;
3226 add_referenced_var (perm_dest);
3228 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3229 perm_dest, first_vect,
3230 second_vect);
3231 data_ref = make_ssa_name (perm_dest, perm_stmt);
3232 gimple_assign_set_lhs (perm_stmt, data_ref);
3233 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3234 mark_symbols_for_renaming (perm_stmt);
3236 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3238 dr_chain = VEC_copy (tree, heap, *result_chain);
3240 return true;
3244 /* Function vect_transform_strided_load.
3246 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3247 to perform their permutation and ascribe the result vectorized statements to
3248 the scalar statements.
3251 bool
3252 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3253 gimple_stmt_iterator *gsi)
3255 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3256 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3257 gimple next_stmt, new_stmt;
3258 VEC(tree,heap) *result_chain = NULL;
3259 unsigned int i, gap_count;
3260 tree tmp_data_ref;
3262 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3263 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3264 vectors, that are ready for vector computation. */
3265 result_chain = VEC_alloc (tree, heap, size);
3266 /* Permute. */
3267 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3268 return false;
3270 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3271 Since we scan the chain starting from it's first node, their order
3272 corresponds the order of data-refs in RESULT_CHAIN. */
3273 next_stmt = first_stmt;
3274 gap_count = 1;
3275 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3277 if (!next_stmt)
3278 break;
3280 /* Skip the gaps. Loads created for the gaps will be removed by dead
3281 code elimination pass later. No need to check for the first stmt in
3282 the group, since it always exists.
3283 DR_GROUP_GAP is the number of steps in elements from the previous
3284 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3285 correspond to the gaps.
3287 if (next_stmt != first_stmt
3288 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3290 gap_count++;
3291 continue;
3294 while (next_stmt)
3296 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3297 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3298 copies, and we put the new vector statement in the first available
3299 RELATED_STMT. */
3300 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3301 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3302 else
3304 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3306 gimple prev_stmt =
3307 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3308 gimple rel_stmt =
3309 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3310 while (rel_stmt)
3312 prev_stmt = rel_stmt;
3313 rel_stmt =
3314 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3317 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3318 new_stmt;
3322 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3323 gap_count = 1;
3324 /* If NEXT_STMT accesses the same DR as the previous statement,
3325 put the same TMP_DATA_REF as its vectorized statement; otherwise
3326 get the next data-ref from RESULT_CHAIN. */
3327 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3328 break;
3332 VEC_free (tree, heap, result_chain);
3333 return true;
3336 /* Function vect_force_dr_alignment_p.
3338 Returns whether the alignment of a DECL can be forced to be aligned
3339 on ALIGNMENT bit boundary. */
3341 bool
3342 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3344 if (TREE_CODE (decl) != VAR_DECL)
3345 return false;
3347 if (DECL_EXTERNAL (decl))
3348 return false;
3350 if (TREE_ASM_WRITTEN (decl))
3351 return false;
3353 if (TREE_STATIC (decl))
3354 return (alignment <= MAX_OFILE_ALIGNMENT);
3355 else
3356 return (alignment <= MAX_STACK_ALIGNMENT);
3359 /* Function vect_supportable_dr_alignment
3361 Return whether the data reference DR is supported with respect to its
3362 alignment. */
3364 enum dr_alignment_support
3365 vect_supportable_dr_alignment (struct data_reference *dr)
3367 gimple stmt = DR_STMT (dr);
3368 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3369 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3370 enum machine_mode mode = TYPE_MODE (vectype);
3371 bool invariant_in_outerloop = false;
3372 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3373 struct loop *vect_loop = NULL;
3374 bool nested_in_vect_loop = false;
3376 if (aligned_access_p (dr))
3377 return dr_aligned;
3379 if (!loop_vinfo)
3380 /* FORNOW: Misaligned accesses are supported only in loops. */
3381 return dr_unaligned_unsupported;
3383 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3384 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3386 if (nested_in_vect_loop)
3388 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3389 invariant_in_outerloop =
3390 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3393 /* Possibly unaligned access. */
3395 /* We can choose between using the implicit realignment scheme (generating
3396 a misaligned_move stmt) and the explicit realignment scheme (generating
3397 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3398 realignment scheme: optimized, and unoptimized.
3399 We can optimize the realignment only if the step between consecutive
3400 vector loads is equal to the vector size. Since the vector memory
3401 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3402 is guaranteed that the misalignment amount remains the same throughout the
3403 execution of the vectorized loop. Therefore, we can create the
3404 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3405 at the loop preheader.
3407 However, in the case of outer-loop vectorization, when vectorizing a
3408 memory access in the inner-loop nested within the LOOP that is now being
3409 vectorized, while it is guaranteed that the misalignment of the
3410 vectorized memory access will remain the same in different outer-loop
3411 iterations, it is *not* guaranteed that is will remain the same throughout
3412 the execution of the inner-loop. This is because the inner-loop advances
3413 with the original scalar step (and not in steps of VS). If the inner-loop
3414 step happens to be a multiple of VS, then the misalignment remains fixed
3415 and we can use the optimized realignment scheme. For example:
3417 for (i=0; i<N; i++)
3418 for (j=0; j<M; j++)
3419 s += a[i+j];
3421 When vectorizing the i-loop in the above example, the step between
3422 consecutive vector loads is 1, and so the misalignment does not remain
3423 fixed across the execution of the inner-loop, and the realignment cannot
3424 be optimized (as illustrated in the following pseudo vectorized loop):
3426 for (i=0; i<N; i+=4)
3427 for (j=0; j<M; j++){
3428 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3429 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3430 // (assuming that we start from an aligned address).
3433 We therefore have to use the unoptimized realignment scheme:
3435 for (i=0; i<N; i+=4)
3436 for (j=k; j<M; j+=4)
3437 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3438 // that the misalignment of the initial address is
3439 // 0).
3441 The loop can then be vectorized as follows:
3443 for (k=0; k<4; k++){
3444 rt = get_realignment_token (&vp[k]);
3445 for (i=0; i<N; i+=4){
3446 v1 = vp[i+k];
3447 for (j=k; j<M; j+=4){
3448 v2 = vp[i+j+VS-1];
3449 va = REALIGN_LOAD <v1,v2,rt>;
3450 vs += va;
3451 v1 = v2;
3454 } */
3456 if (DR_IS_READ (dr))
3458 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3459 CODE_FOR_nothing
3460 && (!targetm.vectorize.builtin_mask_for_load
3461 || targetm.vectorize.builtin_mask_for_load ()))
3463 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3464 if (nested_in_vect_loop
3465 && (TREE_INT_CST_LOW (DR_STEP (dr))
3466 != GET_MODE_SIZE (TYPE_MODE (vectype))))
3467 return dr_explicit_realign;
3468 else
3469 return dr_explicit_realign_optimized;
3472 if (optab_handler (movmisalign_optab, mode)->insn_code !=
3473 CODE_FOR_nothing)
3474 /* Can't software pipeline the loads, but can at least do them. */
3475 return dr_unaligned_supported;
3477 else
3479 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
3480 return dr_unaligned_supported;
3483 /* Unsupported. */
3484 return dr_unaligned_unsupported;