20090811-1.c: Skip for incompatible options, do not override other options.
[official-gcc.git] / gcc / tree-vect-patterns.c
blobf0add8c60743c28e3dacc1444aef025b8127db83
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
32 #include "cfgloop.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "params.h"
36 #include "tree-data-ref.h"
37 #include "tree-vectorizer.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
41 /* Pattern recognition functions */
42 static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *);
43 static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *);
44 static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *);
45 static gimple vect_recog_pow_pattern (gimple *, tree *, tree *);
46 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
47 vect_recog_widen_mult_pattern,
48 vect_recog_widen_sum_pattern,
49 vect_recog_dot_prod_pattern,
50 vect_recog_pow_pattern};
53 /* Function widened_name_p
55 Check whether NAME, an ssa-name used in USE_STMT,
56 is a result of a type-promotion, such that:
57 DEF_STMT: NAME = NOP (name0)
58 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
59 If CHECK_SIGN is TRUE, check that either both types are signed or both are
60 unsigned. */
62 static bool
63 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
64 bool check_sign)
66 tree dummy;
67 gimple dummy_gimple;
68 loop_vec_info loop_vinfo;
69 stmt_vec_info stmt_vinfo;
70 tree type = TREE_TYPE (name);
71 tree oprnd0;
72 enum vect_def_type dt;
73 tree def;
75 stmt_vinfo = vinfo_for_stmt (use_stmt);
76 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
78 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
79 return false;
81 if (dt != vect_internal_def
82 && dt != vect_external_def && dt != vect_constant_def)
83 return false;
85 if (! *def_stmt)
86 return false;
88 if (!is_gimple_assign (*def_stmt))
89 return false;
91 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
92 return false;
94 oprnd0 = gimple_assign_rhs1 (*def_stmt);
96 *half_type = TREE_TYPE (oprnd0);
97 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
98 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
99 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
100 return false;
102 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
103 &dt))
104 return false;
106 return true;
109 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
110 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
112 static tree
113 vect_recog_temp_ssa_var (tree type, gimple stmt)
115 tree var = create_tmp_var (type, "patt");
117 add_referenced_var (var);
118 var = make_ssa_name (var, stmt);
119 return var;
122 /* Function vect_recog_dot_prod_pattern
124 Try to find the following pattern:
126 type x_t, y_t;
127 TYPE1 prod;
128 TYPE2 sum = init;
129 loop:
130 sum_0 = phi <init, sum_1>
131 S1 x_t = ...
132 S2 y_t = ...
133 S3 x_T = (TYPE1) x_t;
134 S4 y_T = (TYPE1) y_t;
135 S5 prod = x_T * y_T;
136 [S6 prod = (TYPE2) prod; #optional]
137 S7 sum_1 = prod + sum_0;
139 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
140 same size of 'TYPE1' or bigger. This is a special case of a reduction
141 computation.
143 Input:
145 * LAST_STMT: A stmt from which the pattern search begins. In the example,
146 when this function is called with S7, the pattern {S3,S4,S5,S6,S7} will be
147 detected.
149 Output:
151 * TYPE_IN: The type of the input arguments to the pattern.
153 * TYPE_OUT: The type of the output of this pattern.
155 * Return value: A new stmt that will be used to replace the sequence of
156 stmts that constitute the pattern. In this case it will be:
157 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
159 Note: The dot-prod idiom is a widening reduction pattern that is
160 vectorized without preserving all the intermediate results. It
161 produces only N/2 (widened) results (by summing up pairs of
162 intermediate results) rather than all N results. Therefore, we
163 cannot allow this pattern when we want to get all the results and in
164 the correct order (as is the case when this computation is in an
165 inner-loop nested in an outer-loop that us being vectorized). */
167 static gimple
168 vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
170 gimple stmt;
171 tree oprnd0, oprnd1;
172 tree oprnd00, oprnd01;
173 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
174 tree type, half_type;
175 gimple pattern_stmt;
176 tree prod_type;
177 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
178 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
179 tree var;
181 if (!is_gimple_assign (*last_stmt))
182 return NULL;
184 type = gimple_expr_type (*last_stmt);
186 /* Look for the following pattern
187 DX = (TYPE1) X;
188 DY = (TYPE1) Y;
189 DPROD = DX * DY;
190 DDPROD = (TYPE2) DPROD;
191 sum_1 = DDPROD + sum_0;
192 In which
193 - DX is double the size of X
194 - DY is double the size of Y
195 - DX, DY, DPROD all have the same type
196 - sum is the same size of DPROD or bigger
197 - sum has been recognized as a reduction variable.
199 This is equivalent to:
200 DPROD = X w* Y; #widen mult
201 sum_1 = DPROD w+ sum_0; #widen summation
203 DPROD = X w* Y; #widen mult
204 sum_1 = DPROD + sum_0; #summation
207 /* Starting from LAST_STMT, follow the defs of its uses in search
208 of the above pattern. */
210 if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
211 return NULL;
213 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
215 /* Has been detected as widening-summation? */
217 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
218 type = gimple_expr_type (stmt);
219 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
220 return NULL;
221 oprnd0 = gimple_assign_rhs1 (stmt);
222 oprnd1 = gimple_assign_rhs2 (stmt);
223 half_type = TREE_TYPE (oprnd0);
225 else
227 gimple def_stmt;
229 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
230 return NULL;
231 oprnd0 = gimple_assign_rhs1 (*last_stmt);
232 oprnd1 = gimple_assign_rhs2 (*last_stmt);
233 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
234 || !types_compatible_p (TREE_TYPE (oprnd1), type))
235 return NULL;
236 stmt = *last_stmt;
238 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
240 stmt = def_stmt;
241 oprnd0 = gimple_assign_rhs1 (stmt);
243 else
244 half_type = type;
247 /* So far so good. Since *last_stmt was detected as a (summation) reduction,
248 we know that oprnd1 is the reduction variable (defined by a loop-header
249 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
250 Left to check that oprnd0 is defined by a (widen_)mult_expr */
252 prod_type = half_type;
253 stmt = SSA_NAME_DEF_STMT (oprnd0);
255 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
256 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
257 return NULL;
259 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
260 inside the loop (in case we are analyzing an outer-loop). */
261 if (!is_gimple_assign (stmt))
262 return NULL;
263 stmt_vinfo = vinfo_for_stmt (stmt);
264 gcc_assert (stmt_vinfo);
265 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
266 return NULL;
267 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
268 return NULL;
269 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
271 /* Has been detected as a widening multiplication? */
273 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
274 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
275 return NULL;
276 stmt_vinfo = vinfo_for_stmt (stmt);
277 gcc_assert (stmt_vinfo);
278 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
279 oprnd00 = gimple_assign_rhs1 (stmt);
280 oprnd01 = gimple_assign_rhs2 (stmt);
282 else
284 tree half_type0, half_type1;
285 gimple def_stmt;
286 tree oprnd0, oprnd1;
288 oprnd0 = gimple_assign_rhs1 (stmt);
289 oprnd1 = gimple_assign_rhs2 (stmt);
290 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
291 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
292 return NULL;
293 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
294 return NULL;
295 oprnd00 = gimple_assign_rhs1 (def_stmt);
296 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
297 return NULL;
298 oprnd01 = gimple_assign_rhs1 (def_stmt);
299 if (!types_compatible_p (half_type0, half_type1))
300 return NULL;
301 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
302 return NULL;
305 half_type = TREE_TYPE (oprnd00);
306 *type_in = half_type;
307 *type_out = type;
309 /* Pattern detected. Create a stmt to be used to replace the pattern: */
310 var = vect_recog_temp_ssa_var (type, NULL);
311 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
312 oprnd00, oprnd01, oprnd1);
314 if (vect_print_dump_info (REPORT_DETAILS))
316 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
317 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
320 /* We don't allow changing the order of the computation in the inner-loop
321 when doing outer-loop vectorization. */
322 gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
324 return pattern_stmt;
327 /* Function vect_recog_widen_mult_pattern
329 Try to find the following pattern:
331 type a_t, b_t;
332 TYPE a_T, b_T, prod_T;
334 S1 a_t = ;
335 S2 b_t = ;
336 S3 a_T = (TYPE) a_t;
337 S4 b_T = (TYPE) b_t;
338 S5 prod_T = a_T * b_T;
340 where type 'TYPE' is at least double the size of type 'type'.
342 Also detect unsgigned cases:
344 unsigned type a_t, b_t;
345 unsigned TYPE u_prod_T;
346 TYPE a_T, b_T, prod_T;
348 S1 a_t = ;
349 S2 b_t = ;
350 S3 a_T = (TYPE) a_t;
351 S4 b_T = (TYPE) b_t;
352 S5 prod_T = a_T * b_T;
353 S6 u_prod_T = (unsigned TYPE) prod_T;
355 and multiplication by constants:
357 type a_t;
358 TYPE a_T, prod_T;
360 S1 a_t = ;
361 S3 a_T = (TYPE) a_t;
362 S5 prod_T = a_T * CONST;
364 Input:
366 * LAST_STMT: A stmt from which the pattern search begins. In the example,
367 when this function is called with S5, the pattern {S3,S4,S5,(S6)} is
368 detected.
370 Output:
372 * TYPE_IN: The type of the input arguments to the pattern.
374 * TYPE_OUT: The type of the output of this pattern.
376 * Return value: A new stmt that will be used to replace the sequence of
377 stmts that constitute the pattern. In this case it will be:
378 WIDEN_MULT <a_t, b_t>
381 static gimple
382 vect_recog_widen_mult_pattern (gimple *last_stmt,
383 tree *type_in,
384 tree *type_out)
386 gimple def_stmt0, def_stmt1;
387 tree oprnd0, oprnd1;
388 tree type, half_type0, half_type1;
389 gimple pattern_stmt;
390 tree vectype, vectype_out = NULL_TREE;
391 tree dummy;
392 tree var;
393 enum tree_code dummy_code;
394 int dummy_int;
395 VEC (tree, heap) *dummy_vec;
396 bool op0_ok, op1_ok;
398 if (!is_gimple_assign (*last_stmt))
399 return NULL;
401 type = gimple_expr_type (*last_stmt);
403 /* Starting from LAST_STMT, follow the defs of its uses in search
404 of the above pattern. */
406 if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR)
407 return NULL;
409 oprnd0 = gimple_assign_rhs1 (*last_stmt);
410 oprnd1 = gimple_assign_rhs2 (*last_stmt);
411 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
412 || !types_compatible_p (TREE_TYPE (oprnd1), type))
413 return NULL;
415 /* Check argument 0. */
416 op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false);
417 /* Check argument 1. */
418 op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false);
420 /* In case of multiplication by a constant one of the operands may not match
421 the pattern, but not both. */
422 if (!op0_ok && !op1_ok)
423 return NULL;
425 if (op0_ok && op1_ok)
427 oprnd0 = gimple_assign_rhs1 (def_stmt0);
428 oprnd1 = gimple_assign_rhs1 (def_stmt1);
430 else if (!op0_ok)
432 if (CONSTANT_CLASS_P (oprnd0)
433 && TREE_CODE (half_type1) == INTEGER_TYPE
434 && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1))
435 && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0))
437 /* OPRND0 is a constant of HALF_TYPE1. */
438 half_type0 = half_type1;
439 oprnd1 = gimple_assign_rhs1 (def_stmt1);
441 else
442 return NULL;
444 else if (!op1_ok)
446 if (CONSTANT_CLASS_P (oprnd1)
447 && TREE_CODE (half_type0) == INTEGER_TYPE
448 && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0))
449 && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1))
451 /* OPRND1 is a constant of HALF_TYPE0. */
452 half_type1 = half_type0;
453 oprnd0 = gimple_assign_rhs1 (def_stmt0);
455 else
456 return NULL;
459 /* Handle unsigned case. Look for
460 S6 u_prod_T = (unsigned TYPE) prod_T;
461 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
462 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
464 tree lhs = gimple_assign_lhs (*last_stmt), use_lhs;
465 imm_use_iterator imm_iter;
466 use_operand_p use_p;
467 int nuses = 0;
468 gimple use_stmt = NULL;
469 tree use_type;
471 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
472 return NULL;
474 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
476 use_stmt = USE_STMT (use_p);
477 nuses++;
480 if (nuses != 1 || !is_gimple_assign (use_stmt)
481 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
482 return NULL;
484 use_lhs = gimple_assign_lhs (use_stmt);
485 use_type = TREE_TYPE (use_lhs);
486 if (!INTEGRAL_TYPE_P (use_type)
487 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
488 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
489 return NULL;
491 type = use_type;
492 *last_stmt = use_stmt;
495 if (!types_compatible_p (half_type0, half_type1))
496 return NULL;
498 /* Pattern detected. */
499 if (vect_print_dump_info (REPORT_DETAILS))
500 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
502 /* Check target support */
503 vectype = get_vectype_for_scalar_type (half_type0);
504 vectype_out = get_vectype_for_scalar_type (type);
505 if (!vectype
506 || !vectype_out
507 || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt,
508 vectype_out, vectype,
509 &dummy, &dummy, &dummy_code,
510 &dummy_code, &dummy_int, &dummy_vec))
511 return NULL;
513 *type_in = vectype;
514 *type_out = vectype_out;
516 /* Pattern supported. Create a stmt to be used to replace the pattern: */
517 var = vect_recog_temp_ssa_var (type, NULL);
518 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
519 oprnd1);
520 SSA_NAME_DEF_STMT (var) = pattern_stmt;
522 if (vect_print_dump_info (REPORT_DETAILS))
523 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
525 return pattern_stmt;
529 /* Function vect_recog_pow_pattern
531 Try to find the following pattern:
533 x = POW (y, N);
535 with POW being one of pow, powf, powi, powif and N being
536 either 2 or 0.5.
538 Input:
540 * LAST_STMT: A stmt from which the pattern search begins.
542 Output:
544 * TYPE_IN: The type of the input arguments to the pattern.
546 * TYPE_OUT: The type of the output of this pattern.
548 * Return value: A new stmt that will be used to replace the sequence of
549 stmts that constitute the pattern. In this case it will be:
550 x = x * x
552 x = sqrt (x)
555 static gimple
556 vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
558 tree fn, base, exp = NULL;
559 gimple stmt;
560 tree var;
562 if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL)
563 return NULL;
565 fn = gimple_call_fndecl (*last_stmt);
566 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
567 return NULL;
569 switch (DECL_FUNCTION_CODE (fn))
571 case BUILT_IN_POWIF:
572 case BUILT_IN_POWI:
573 case BUILT_IN_POWF:
574 case BUILT_IN_POW:
575 base = gimple_call_arg (*last_stmt, 0);
576 exp = gimple_call_arg (*last_stmt, 1);
577 if (TREE_CODE (exp) != REAL_CST
578 && TREE_CODE (exp) != INTEGER_CST)
579 return NULL;
580 break;
582 default:
583 return NULL;
586 /* We now have a pow or powi builtin function call with a constant
587 exponent. */
589 *type_out = NULL_TREE;
591 /* Catch squaring. */
592 if ((host_integerp (exp, 0)
593 && tree_low_cst (exp, 0) == 2)
594 || (TREE_CODE (exp) == REAL_CST
595 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
597 *type_in = TREE_TYPE (base);
599 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
600 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
601 SSA_NAME_DEF_STMT (var) = stmt;
602 return stmt;
605 /* Catch square root. */
606 if (TREE_CODE (exp) == REAL_CST
607 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
609 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
610 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
611 if (*type_in)
613 gimple stmt = gimple_build_call (newfn, 1, base);
614 if (vectorizable_function (stmt, *type_in, *type_in)
615 != NULL_TREE)
617 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
618 gimple_call_set_lhs (stmt, var);
619 return stmt;
624 return NULL;
628 /* Function vect_recog_widen_sum_pattern
630 Try to find the following pattern:
632 type x_t;
633 TYPE x_T, sum = init;
634 loop:
635 sum_0 = phi <init, sum_1>
636 S1 x_t = *p;
637 S2 x_T = (TYPE) x_t;
638 S3 sum_1 = x_T + sum_0;
640 where type 'TYPE' is at least double the size of type 'type', i.e - we're
641 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
642 a special case of a reduction computation.
644 Input:
646 * LAST_STMT: A stmt from which the pattern search begins. In the example,
647 when this function is called with S3, the pattern {S2,S3} will be detected.
649 Output:
651 * TYPE_IN: The type of the input arguments to the pattern.
653 * TYPE_OUT: The type of the output of this pattern.
655 * Return value: A new stmt that will be used to replace the sequence of
656 stmts that constitute the pattern. In this case it will be:
657 WIDEN_SUM <x_t, sum_0>
659 Note: The widening-sum idiom is a widening reduction pattern that is
660 vectorized without preserving all the intermediate results. It
661 produces only N/2 (widened) results (by summing up pairs of
662 intermediate results) rather than all N results. Therefore, we
663 cannot allow this pattern when we want to get all the results and in
664 the correct order (as is the case when this computation is in an
665 inner-loop nested in an outer-loop that us being vectorized). */
667 static gimple
668 vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out)
670 gimple stmt;
671 tree oprnd0, oprnd1;
672 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt);
673 tree type, half_type;
674 gimple pattern_stmt;
675 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
676 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
677 tree var;
679 if (!is_gimple_assign (*last_stmt))
680 return NULL;
682 type = gimple_expr_type (*last_stmt);
684 /* Look for the following pattern
685 DX = (TYPE) X;
686 sum_1 = DX + sum_0;
687 In which DX is at least double the size of X, and sum_1 has been
688 recognized as a reduction variable.
691 /* Starting from LAST_STMT, follow the defs of its uses in search
692 of the above pattern. */
694 if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR)
695 return NULL;
697 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
698 return NULL;
700 oprnd0 = gimple_assign_rhs1 (*last_stmt);
701 oprnd1 = gimple_assign_rhs2 (*last_stmt);
702 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
703 || !types_compatible_p (TREE_TYPE (oprnd1), type))
704 return NULL;
706 /* So far so good. Since *last_stmt was detected as a (summation) reduction,
707 we know that oprnd1 is the reduction variable (defined by a loop-header
708 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
709 Left to check that oprnd0 is defined by a cast from type 'type' to type
710 'TYPE'. */
712 if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true))
713 return NULL;
715 oprnd0 = gimple_assign_rhs1 (stmt);
716 *type_in = half_type;
717 *type_out = type;
719 /* Pattern detected. Create a stmt to be used to replace the pattern: */
720 var = vect_recog_temp_ssa_var (type, NULL);
721 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
722 oprnd0, oprnd1);
723 SSA_NAME_DEF_STMT (var) = pattern_stmt;
725 if (vect_print_dump_info (REPORT_DETAILS))
727 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
728 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
731 /* We don't allow changing the order of the computation in the inner-loop
732 when doing outer-loop vectorization. */
733 gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt));
735 return pattern_stmt;
739 /* Function vect_pattern_recog_1
741 Input:
742 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
743 computation pattern.
744 STMT: A stmt from which the pattern search should start.
746 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
747 expression that computes the same functionality and can be used to
748 replace the sequence of stmts that are involved in the pattern.
750 Output:
751 This function checks if the expression returned by PATTERN_RECOG_FUNC is
752 supported in vector form by the target. We use 'TYPE_IN' to obtain the
753 relevant vector type. If 'TYPE_IN' is already a vector type, then this
754 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
755 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
756 to the available target pattern.
758 This function also does some bookkeeping, as explained in the documentation
759 for vect_recog_pattern. */
761 static void
762 vect_pattern_recog_1 (
763 gimple (* vect_recog_func) (gimple *, tree *, tree *),
764 gimple_stmt_iterator si)
766 gimple stmt = gsi_stmt (si), pattern_stmt;
767 stmt_vec_info stmt_info;
768 stmt_vec_info pattern_stmt_info;
769 loop_vec_info loop_vinfo;
770 tree pattern_vectype;
771 tree type_in, type_out;
772 enum tree_code code;
773 int i;
774 gimple next;
776 pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out);
777 if (!pattern_stmt)
778 return;
780 si = gsi_for_stmt (stmt);
781 stmt_info = vinfo_for_stmt (stmt);
782 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
784 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
786 /* No need to check target support (already checked by the pattern
787 recognition function). */
788 if (type_out)
789 gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out)));
790 pattern_vectype = type_out ? type_out : type_in;
792 else
794 enum machine_mode vec_mode;
795 enum insn_code icode;
796 optab optab;
798 /* Check target support */
799 type_in = get_vectype_for_scalar_type (type_in);
800 if (!type_in)
801 return;
802 if (type_out)
803 type_out = get_vectype_for_scalar_type (type_out);
804 else
805 type_out = type_in;
806 if (!type_out)
807 return;
808 pattern_vectype = type_out;
810 if (is_gimple_assign (pattern_stmt))
811 code = gimple_assign_rhs_code (pattern_stmt);
812 else
814 gcc_assert (is_gimple_call (pattern_stmt));
815 code = CALL_EXPR;
818 optab = optab_for_tree_code (code, type_in, optab_default);
819 vec_mode = TYPE_MODE (type_in);
820 if (!optab
821 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
822 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
823 return;
826 /* Found a vectorizable pattern. */
827 if (vect_print_dump_info (REPORT_DETAILS))
829 fprintf (vect_dump, "pattern recognized: ");
830 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
833 /* Mark the stmts that are involved in the pattern. */
834 gsi_insert_before (&si, pattern_stmt, GSI_SAME_STMT);
835 set_vinfo_for_stmt (pattern_stmt,
836 new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
837 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
839 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
840 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info);
841 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
842 STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
843 STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt;
845 /* Patterns cannot be vectorized using SLP, because they change the order of
846 computation. */
847 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
848 if (next == stmt)
849 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
853 /* Function vect_pattern_recog
855 Input:
856 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
857 computation idioms.
859 Output - for each computation idiom that is detected we insert a new stmt
860 that provides the same functionality and that can be vectorized. We
861 also record some information in the struct_stmt_info of the relevant
862 stmts, as explained below:
864 At the entry to this function we have the following stmts, with the
865 following initial value in the STMT_VINFO fields:
867 stmt in_pattern_p related_stmt vec_stmt
868 S1: a_i = .... - - -
869 S2: a_2 = ..use(a_i).. - - -
870 S3: a_1 = ..use(a_2).. - - -
871 S4: a_0 = ..use(a_1).. - - -
872 S5: ... = ..use(a_0).. - - -
874 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
875 represented by a single stmt. We then:
876 - create a new stmt S6 that will replace the pattern.
877 - insert the new stmt S6 before the last stmt in the pattern
878 - fill in the STMT_VINFO fields as follows:
880 in_pattern_p related_stmt vec_stmt
881 S1: a_i = .... - - -
882 S2: a_2 = ..use(a_i).. - - -
883 S3: a_1 = ..use(a_2).. - - -
884 > S6: a_new = .... - S4 -
885 S4: a_0 = ..use(a_1).. true S6 -
886 S5: ... = ..use(a_0).. - - -
888 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
889 to each other through the RELATED_STMT field).
891 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
892 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
893 remain irrelevant unless used by stmts other than S4.
895 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
896 (because they are marked as irrelevant). It will vectorize S6, and record
897 a pointer to the new vector stmt VS6 both from S6 (as usual), and also
898 from S4. We do that so that when we get to vectorizing stmts that use the
899 def of S4 (like S5 that uses a_0), we'll know where to take the relevant
900 vector-def from. S4 will be skipped, and S5 will be vectorized as usual:
902 in_pattern_p related_stmt vec_stmt
903 S1: a_i = .... - - -
904 S2: a_2 = ..use(a_i).. - - -
905 S3: a_1 = ..use(a_2).. - - -
906 > VS6: va_new = .... - - -
907 S6: a_new = .... - S4 VS6
908 S4: a_0 = ..use(a_1).. true S6 VS6
909 > VS5: ... = ..vuse(va_new).. - - -
910 S5: ... = ..use(a_0).. - - -
912 DCE could then get rid of {S1,S2,S3,S4,S5,S6} (if their defs are not used
913 elsewhere), and we'll end up with:
915 VS6: va_new = ....
916 VS5: ... = ..vuse(va_new)..
918 If vectorization does not succeed, DCE will clean S6 away (its def is
919 not used), and we'll end up with the original sequence.
922 void
923 vect_pattern_recog (loop_vec_info loop_vinfo)
925 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
926 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
927 unsigned int nbbs = loop->num_nodes;
928 gimple_stmt_iterator si;
929 unsigned int i, j;
930 gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *);
932 if (vect_print_dump_info (REPORT_DETAILS))
933 fprintf (vect_dump, "=== vect_pattern_recog ===");
935 /* Scan through the loop stmts, applying the pattern recognition
936 functions starting at each stmt visited: */
937 for (i = 0; i < nbbs; i++)
939 basic_block bb = bbs[i];
940 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
942 /* Scan over all generic vect_recog_xxx_pattern functions. */
943 for (j = 0; j < NUM_PATTERNS; j++)
945 vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
946 vect_pattern_recog_1 (vect_recog_func_ptr, si);