2011-10-07 Tom de Vries <tom@codesourcery.com>
[official-gcc.git] / gcc / tree-vect-patterns.c
bloba47b87b6f1c63a2cf6eeaa610cef807ab6b151ac
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
54 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
55 vect_recog_widen_mult_pattern,
56 vect_recog_widen_sum_pattern,
57 vect_recog_dot_prod_pattern,
58 vect_recog_pow_pattern,
59 vect_recog_over_widening_pattern,
60 vect_recog_mixed_size_cond_pattern};
63 /* Function widened_name_p
65 Check whether NAME, an ssa-name used in USE_STMT,
66 is a result of a type-promotion, such that:
67 DEF_STMT: NAME = NOP (name0)
68 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
69 If CHECK_SIGN is TRUE, check that either both types are signed or both are
70 unsigned. */
72 static bool
73 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
74 bool check_sign)
76 tree dummy;
77 gimple dummy_gimple;
78 loop_vec_info loop_vinfo;
79 stmt_vec_info stmt_vinfo;
80 tree type = TREE_TYPE (name);
81 tree oprnd0;
82 enum vect_def_type dt;
83 tree def;
85 stmt_vinfo = vinfo_for_stmt (use_stmt);
86 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
88 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
89 return false;
91 if (dt != vect_internal_def
92 && dt != vect_external_def && dt != vect_constant_def)
93 return false;
95 if (! *def_stmt)
96 return false;
98 if (!is_gimple_assign (*def_stmt))
99 return false;
101 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
102 return false;
104 oprnd0 = gimple_assign_rhs1 (*def_stmt);
106 *half_type = TREE_TYPE (oprnd0);
107 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
108 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
109 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
110 return false;
112 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
113 &dt))
114 return false;
116 return true;
119 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
120 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
122 static tree
123 vect_recog_temp_ssa_var (tree type, gimple stmt)
125 tree var = create_tmp_var (type, "patt");
127 add_referenced_var (var);
128 var = make_ssa_name (var, stmt);
129 return var;
132 /* Function vect_recog_dot_prod_pattern
134 Try to find the following pattern:
136 type x_t, y_t;
137 TYPE1 prod;
138 TYPE2 sum = init;
139 loop:
140 sum_0 = phi <init, sum_1>
141 S1 x_t = ...
142 S2 y_t = ...
143 S3 x_T = (TYPE1) x_t;
144 S4 y_T = (TYPE1) y_t;
145 S5 prod = x_T * y_T;
146 [S6 prod = (TYPE2) prod; #optional]
147 S7 sum_1 = prod + sum_0;
149 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
150 same size of 'TYPE1' or bigger. This is a special case of a reduction
151 computation.
153 Input:
155 * STMTS: Contains a stmt from which the pattern search begins. In the
156 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
157 will be detected.
159 Output:
161 * TYPE_IN: The type of the input arguments to the pattern.
163 * TYPE_OUT: The type of the output of this pattern.
165 * Return value: A new stmt that will be used to replace the sequence of
166 stmts that constitute the pattern. In this case it will be:
167 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
169 Note: The dot-prod idiom is a widening reduction pattern that is
170 vectorized without preserving all the intermediate results. It
171 produces only N/2 (widened) results (by summing up pairs of
172 intermediate results) rather than all N results. Therefore, we
173 cannot allow this pattern when we want to get all the results and in
174 the correct order (as is the case when this computation is in an
175 inner-loop nested in an outer-loop that us being vectorized). */
177 static gimple
178 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
179 tree *type_out)
181 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
182 tree oprnd0, oprnd1;
183 tree oprnd00, oprnd01;
184 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
185 tree type, half_type;
186 gimple pattern_stmt;
187 tree prod_type;
188 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
189 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
190 tree var;
192 if (!is_gimple_assign (last_stmt))
193 return NULL;
195 type = gimple_expr_type (last_stmt);
197 /* Look for the following pattern
198 DX = (TYPE1) X;
199 DY = (TYPE1) Y;
200 DPROD = DX * DY;
201 DDPROD = (TYPE2) DPROD;
202 sum_1 = DDPROD + sum_0;
203 In which
204 - DX is double the size of X
205 - DY is double the size of Y
206 - DX, DY, DPROD all have the same type
207 - sum is the same size of DPROD or bigger
208 - sum has been recognized as a reduction variable.
210 This is equivalent to:
211 DPROD = X w* Y; #widen mult
212 sum_1 = DPROD w+ sum_0; #widen summation
214 DPROD = X w* Y; #widen mult
215 sum_1 = DPROD + sum_0; #summation
218 /* Starting from LAST_STMT, follow the defs of its uses in search
219 of the above pattern. */
221 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
222 return NULL;
224 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
226 /* Has been detected as widening-summation? */
228 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
229 type = gimple_expr_type (stmt);
230 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
231 return NULL;
232 oprnd0 = gimple_assign_rhs1 (stmt);
233 oprnd1 = gimple_assign_rhs2 (stmt);
234 half_type = TREE_TYPE (oprnd0);
236 else
238 gimple def_stmt;
240 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
241 return NULL;
242 oprnd0 = gimple_assign_rhs1 (last_stmt);
243 oprnd1 = gimple_assign_rhs2 (last_stmt);
244 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
245 || !types_compatible_p (TREE_TYPE (oprnd1), type))
246 return NULL;
247 stmt = last_stmt;
249 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
251 stmt = def_stmt;
252 oprnd0 = gimple_assign_rhs1 (stmt);
254 else
255 half_type = type;
258 /* So far so good. Since last_stmt was detected as a (summation) reduction,
259 we know that oprnd1 is the reduction variable (defined by a loop-header
260 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
261 Left to check that oprnd0 is defined by a (widen_)mult_expr */
262 if (TREE_CODE (oprnd0) != SSA_NAME)
263 return NULL;
265 prod_type = half_type;
266 stmt = SSA_NAME_DEF_STMT (oprnd0);
268 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
269 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
270 return NULL;
272 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
273 inside the loop (in case we are analyzing an outer-loop). */
274 if (!is_gimple_assign (stmt))
275 return NULL;
276 stmt_vinfo = vinfo_for_stmt (stmt);
277 gcc_assert (stmt_vinfo);
278 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
279 return NULL;
280 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
281 return NULL;
282 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
284 /* Has been detected as a widening multiplication? */
286 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
287 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
288 return NULL;
289 stmt_vinfo = vinfo_for_stmt (stmt);
290 gcc_assert (stmt_vinfo);
291 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
292 oprnd00 = gimple_assign_rhs1 (stmt);
293 oprnd01 = gimple_assign_rhs2 (stmt);
295 else
297 tree half_type0, half_type1;
298 gimple def_stmt;
299 tree oprnd0, oprnd1;
301 oprnd0 = gimple_assign_rhs1 (stmt);
302 oprnd1 = gimple_assign_rhs2 (stmt);
303 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
304 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
305 return NULL;
306 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
307 return NULL;
308 oprnd00 = gimple_assign_rhs1 (def_stmt);
309 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
310 return NULL;
311 oprnd01 = gimple_assign_rhs1 (def_stmt);
312 if (!types_compatible_p (half_type0, half_type1))
313 return NULL;
314 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
315 return NULL;
318 half_type = TREE_TYPE (oprnd00);
319 *type_in = half_type;
320 *type_out = type;
322 /* Pattern detected. Create a stmt to be used to replace the pattern: */
323 var = vect_recog_temp_ssa_var (type, NULL);
324 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
325 oprnd00, oprnd01, oprnd1);
327 if (vect_print_dump_info (REPORT_DETAILS))
329 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
330 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
333 /* We don't allow changing the order of the computation in the inner-loop
334 when doing outer-loop vectorization. */
335 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
337 return pattern_stmt;
341 /* Handle two cases of multiplication by a constant. The first one is when
342 the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
343 operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
344 TYPE.
346 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
347 HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
348 TYPE), we can perform widen-mult from the intermediate type to TYPE and
349 replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
351 static bool
352 vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
353 VEC (gimple, heap) **stmts, tree type,
354 tree *half_type, gimple def_stmt)
356 tree new_type, new_oprnd, tmp;
357 gimple new_stmt;
358 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
359 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
361 if (int_fits_type_p (const_oprnd, *half_type))
363 /* CONST_OPRND is a constant of HALF_TYPE. */
364 *oprnd = gimple_assign_rhs1 (def_stmt);
365 return true;
368 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
369 || !gimple_bb (def_stmt)
370 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
371 || !vinfo_for_stmt (def_stmt))
372 return false;
374 /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
375 a type 2 times bigger than HALF_TYPE. */
376 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
377 TYPE_UNSIGNED (type));
378 if (!int_fits_type_p (const_oprnd, new_type))
379 return false;
381 /* Use NEW_TYPE for widen_mult. */
382 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
384 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
385 /* Check if the already created pattern stmt is what we need. */
386 if (!is_gimple_assign (new_stmt)
387 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
388 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
389 return false;
391 *oprnd = gimple_assign_lhs (new_stmt);
393 else
395 /* Create a_T = (NEW_TYPE) a_t; */
396 *oprnd = gimple_assign_rhs1 (def_stmt);
397 tmp = create_tmp_var (new_type, NULL);
398 add_referenced_var (tmp);
399 new_oprnd = make_ssa_name (tmp, NULL);
400 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
401 NULL_TREE);
402 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
403 VEC_safe_push (gimple, heap, *stmts, def_stmt);
404 *oprnd = new_oprnd;
407 *half_type = new_type;
408 return true;
412 /* Function vect_recog_widen_mult_pattern
414 Try to find the following pattern:
416 type a_t, b_t;
417 TYPE a_T, b_T, prod_T;
419 S1 a_t = ;
420 S2 b_t = ;
421 S3 a_T = (TYPE) a_t;
422 S4 b_T = (TYPE) b_t;
423 S5 prod_T = a_T * b_T;
425 where type 'TYPE' is at least double the size of type 'type'.
427 Also detect unsgigned cases:
429 unsigned type a_t, b_t;
430 unsigned TYPE u_prod_T;
431 TYPE a_T, b_T, prod_T;
433 S1 a_t = ;
434 S2 b_t = ;
435 S3 a_T = (TYPE) a_t;
436 S4 b_T = (TYPE) b_t;
437 S5 prod_T = a_T * b_T;
438 S6 u_prod_T = (unsigned TYPE) prod_T;
440 and multiplication by constants:
442 type a_t;
443 TYPE a_T, prod_T;
445 S1 a_t = ;
446 S3 a_T = (TYPE) a_t;
447 S5 prod_T = a_T * CONST;
449 A special case of multiplication by constants is when 'TYPE' is 4 times
450 bigger than 'type', but CONST fits an intermediate type 2 times smaller
451 than 'TYPE'. In that case we create an additional pattern stmt for S3
452 to create a variable of the intermediate type, and perform widen-mult
453 on the intermediate type as well:
455 type a_t;
456 interm_type a_it;
457 TYPE a_T, prod_T, prod_T';
459 S1 a_t = ;
460 S3 a_T = (TYPE) a_t;
461 '--> a_it = (interm_type) a_t;
462 S5 prod_T = a_T * CONST;
463 '--> prod_T' = a_it w* CONST;
465 Input/Output:
467 * STMTS: Contains a stmt from which the pattern search begins. In the
468 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
469 is detected. In case of unsigned widen-mult, the original stmt (S5) is
470 replaced with S6 in STMTS. In case of multiplication by a constant
471 of an intermediate type (the last case above), STMTS also contains S3
472 (inserted before S5).
474 Output:
476 * TYPE_IN: The type of the input arguments to the pattern.
478 * TYPE_OUT: The type of the output of this pattern.
480 * Return value: A new stmt that will be used to replace the sequence of
481 stmts that constitute the pattern. In this case it will be:
482 WIDEN_MULT <a_t, b_t>
485 static gimple
486 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
487 tree *type_in, tree *type_out)
489 gimple last_stmt = VEC_pop (gimple, *stmts);
490 gimple def_stmt0, def_stmt1;
491 tree oprnd0, oprnd1;
492 tree type, half_type0, half_type1;
493 gimple pattern_stmt;
494 tree vectype, vectype_out = NULL_TREE;
495 tree dummy;
496 tree var;
497 enum tree_code dummy_code;
498 int dummy_int;
499 VEC (tree, heap) *dummy_vec;
500 bool op0_ok, op1_ok;
502 if (!is_gimple_assign (last_stmt))
503 return NULL;
505 type = gimple_expr_type (last_stmt);
507 /* Starting from LAST_STMT, follow the defs of its uses in search
508 of the above pattern. */
510 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
511 return NULL;
513 oprnd0 = gimple_assign_rhs1 (last_stmt);
514 oprnd1 = gimple_assign_rhs2 (last_stmt);
515 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
516 || !types_compatible_p (TREE_TYPE (oprnd1), type))
517 return NULL;
519 /* Check argument 0. */
520 op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
521 /* Check argument 1. */
522 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
524 /* In case of multiplication by a constant one of the operands may not match
525 the pattern, but not both. */
526 if (!op0_ok && !op1_ok)
527 return NULL;
529 if (op0_ok && op1_ok)
531 oprnd0 = gimple_assign_rhs1 (def_stmt0);
532 oprnd1 = gimple_assign_rhs1 (def_stmt1);
534 else if (!op0_ok)
536 if (TREE_CODE (oprnd0) == INTEGER_CST
537 && TREE_CODE (half_type1) == INTEGER_TYPE
538 && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1,
539 stmts, type,
540 &half_type1, def_stmt1))
541 half_type0 = half_type1;
542 else
543 return NULL;
545 else if (!op1_ok)
547 if (TREE_CODE (oprnd1) == INTEGER_CST
548 && TREE_CODE (half_type0) == INTEGER_TYPE
549 && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
550 stmts, type,
551 &half_type0, def_stmt0))
552 half_type1 = half_type0;
553 else
554 return NULL;
557 /* Handle unsigned case. Look for
558 S6 u_prod_T = (unsigned TYPE) prod_T;
559 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
560 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
562 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
563 imm_use_iterator imm_iter;
564 use_operand_p use_p;
565 int nuses = 0;
566 gimple use_stmt = NULL;
567 tree use_type;
569 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
570 return NULL;
572 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
574 if (is_gimple_debug (USE_STMT (use_p)))
575 continue;
576 use_stmt = USE_STMT (use_p);
577 nuses++;
580 if (nuses != 1 || !is_gimple_assign (use_stmt)
581 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
582 return NULL;
584 use_lhs = gimple_assign_lhs (use_stmt);
585 use_type = TREE_TYPE (use_lhs);
586 if (!INTEGRAL_TYPE_P (use_type)
587 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
588 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
589 return NULL;
591 type = use_type;
592 last_stmt = use_stmt;
595 if (!types_compatible_p (half_type0, half_type1))
596 return NULL;
598 /* Pattern detected. */
599 if (vect_print_dump_info (REPORT_DETAILS))
600 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
602 /* Check target support */
603 vectype = get_vectype_for_scalar_type (half_type0);
604 vectype_out = get_vectype_for_scalar_type (type);
605 if (!vectype
606 || !vectype_out
607 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
608 vectype_out, vectype,
609 &dummy, &dummy, &dummy_code,
610 &dummy_code, &dummy_int, &dummy_vec))
611 return NULL;
613 *type_in = vectype;
614 *type_out = vectype_out;
616 /* Pattern supported. Create a stmt to be used to replace the pattern: */
617 var = vect_recog_temp_ssa_var (type, NULL);
618 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
619 oprnd1);
621 if (vect_print_dump_info (REPORT_DETAILS))
622 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
624 VEC_safe_push (gimple, heap, *stmts, last_stmt);
625 return pattern_stmt;
629 /* Function vect_recog_pow_pattern
631 Try to find the following pattern:
633 x = POW (y, N);
635 with POW being one of pow, powf, powi, powif and N being
636 either 2 or 0.5.
638 Input:
640 * LAST_STMT: A stmt from which the pattern search begins.
642 Output:
644 * TYPE_IN: The type of the input arguments to the pattern.
646 * TYPE_OUT: The type of the output of this pattern.
648 * Return value: A new stmt that will be used to replace the sequence of
649 stmts that constitute the pattern. In this case it will be:
650 x = x * x
652 x = sqrt (x)
655 static gimple
656 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
657 tree *type_out)
659 gimple last_stmt = VEC_index (gimple, *stmts, 0);
660 tree fn, base, exp = NULL;
661 gimple stmt;
662 tree var;
664 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
665 return NULL;
667 fn = gimple_call_fndecl (last_stmt);
668 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
669 return NULL;
671 switch (DECL_FUNCTION_CODE (fn))
673 case BUILT_IN_POWIF:
674 case BUILT_IN_POWI:
675 case BUILT_IN_POWF:
676 case BUILT_IN_POW:
677 base = gimple_call_arg (last_stmt, 0);
678 exp = gimple_call_arg (last_stmt, 1);
679 if (TREE_CODE (exp) != REAL_CST
680 && TREE_CODE (exp) != INTEGER_CST)
681 return NULL;
682 break;
684 default:
685 return NULL;
688 /* We now have a pow or powi builtin function call with a constant
689 exponent. */
691 *type_out = NULL_TREE;
693 /* Catch squaring. */
694 if ((host_integerp (exp, 0)
695 && tree_low_cst (exp, 0) == 2)
696 || (TREE_CODE (exp) == REAL_CST
697 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
699 *type_in = TREE_TYPE (base);
701 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
702 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
703 return stmt;
706 /* Catch square root. */
707 if (TREE_CODE (exp) == REAL_CST
708 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
710 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
711 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
712 if (*type_in)
714 gimple stmt = gimple_build_call (newfn, 1, base);
715 if (vectorizable_function (stmt, *type_in, *type_in)
716 != NULL_TREE)
718 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
719 gimple_call_set_lhs (stmt, var);
720 return stmt;
725 return NULL;
729 /* Function vect_recog_widen_sum_pattern
731 Try to find the following pattern:
733 type x_t;
734 TYPE x_T, sum = init;
735 loop:
736 sum_0 = phi <init, sum_1>
737 S1 x_t = *p;
738 S2 x_T = (TYPE) x_t;
739 S3 sum_1 = x_T + sum_0;
741 where type 'TYPE' is at least double the size of type 'type', i.e - we're
742 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
743 a special case of a reduction computation.
745 Input:
747 * LAST_STMT: A stmt from which the pattern search begins. In the example,
748 when this function is called with S3, the pattern {S2,S3} will be detected.
750 Output:
752 * TYPE_IN: The type of the input arguments to the pattern.
754 * TYPE_OUT: The type of the output of this pattern.
756 * Return value: A new stmt that will be used to replace the sequence of
757 stmts that constitute the pattern. In this case it will be:
758 WIDEN_SUM <x_t, sum_0>
760 Note: The widening-sum idiom is a widening reduction pattern that is
761 vectorized without preserving all the intermediate results. It
762 produces only N/2 (widened) results (by summing up pairs of
763 intermediate results) rather than all N results. Therefore, we
764 cannot allow this pattern when we want to get all the results and in
765 the correct order (as is the case when this computation is in an
766 inner-loop nested in an outer-loop that us being vectorized). */
768 static gimple
769 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
770 tree *type_out)
772 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
773 tree oprnd0, oprnd1;
774 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
775 tree type, half_type;
776 gimple pattern_stmt;
777 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
778 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
779 tree var;
781 if (!is_gimple_assign (last_stmt))
782 return NULL;
784 type = gimple_expr_type (last_stmt);
786 /* Look for the following pattern
787 DX = (TYPE) X;
788 sum_1 = DX + sum_0;
789 In which DX is at least double the size of X, and sum_1 has been
790 recognized as a reduction variable.
793 /* Starting from LAST_STMT, follow the defs of its uses in search
794 of the above pattern. */
796 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
797 return NULL;
799 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
800 return NULL;
802 oprnd0 = gimple_assign_rhs1 (last_stmt);
803 oprnd1 = gimple_assign_rhs2 (last_stmt);
804 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
805 || !types_compatible_p (TREE_TYPE (oprnd1), type))
806 return NULL;
808 /* So far so good. Since last_stmt was detected as a (summation) reduction,
809 we know that oprnd1 is the reduction variable (defined by a loop-header
810 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
811 Left to check that oprnd0 is defined by a cast from type 'type' to type
812 'TYPE'. */
814 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
815 return NULL;
817 oprnd0 = gimple_assign_rhs1 (stmt);
818 *type_in = half_type;
819 *type_out = type;
821 /* Pattern detected. Create a stmt to be used to replace the pattern: */
822 var = vect_recog_temp_ssa_var (type, NULL);
823 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
824 oprnd0, oprnd1);
826 if (vect_print_dump_info (REPORT_DETAILS))
828 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
829 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
832 /* We don't allow changing the order of the computation in the inner-loop
833 when doing outer-loop vectorization. */
834 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
836 return pattern_stmt;
840 /* Return TRUE if the operation in STMT can be performed on a smaller type.
842 Input:
843 STMT - a statement to check.
844 DEF - we support operations with two operands, one of which is constant.
845 The other operand can be defined by a demotion operation, or by a
846 previous statement in a sequence of over-promoted operations. In the
847 later case DEF is used to replace that operand. (It is defined by a
848 pattern statement we created for the previous statement in the
849 sequence).
851 Input/output:
852 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
853 NULL, it's the type of DEF.
854 STMTS - additional pattern statements. If a pattern statement (type
855 conversion) is created in this function, its original statement is
856 added to STMTS.
858 Output:
859 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
860 operands to use in the new pattern statement for STMT (will be created
861 in vect_recog_over_widening_pattern ()).
862 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
863 statements for STMT: the first one is a type promotion and the second
864 one is the operation itself. We return the type promotion statement
865 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
866 the second pattern statement. */
868 static bool
869 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
870 tree *op0, tree *op1, gimple *new_def_stmt,
871 VEC (gimple, heap) **stmts)
873 enum tree_code code;
874 tree const_oprnd, oprnd;
875 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
876 gimple def_stmt, new_stmt;
877 bool first = false;
878 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
879 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
881 *new_def_stmt = NULL;
883 if (!is_gimple_assign (stmt))
884 return false;
886 code = gimple_assign_rhs_code (stmt);
887 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
888 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
889 return false;
891 oprnd = gimple_assign_rhs1 (stmt);
892 const_oprnd = gimple_assign_rhs2 (stmt);
893 type = gimple_expr_type (stmt);
895 if (TREE_CODE (oprnd) != SSA_NAME
896 || TREE_CODE (const_oprnd) != INTEGER_CST)
897 return false;
899 /* If we are in the middle of a sequence, we use DEF from a previous
900 statement. Otherwise, OPRND has to be a result of type promotion. */
901 if (*new_type)
903 half_type = *new_type;
904 oprnd = def;
906 else
908 first = true;
909 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
910 || !gimple_bb (def_stmt)
911 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
912 || !vinfo_for_stmt (def_stmt))
913 return false;
916 /* Can we perform the operation on a smaller type? */
917 switch (code)
919 case BIT_IOR_EXPR:
920 case BIT_XOR_EXPR:
921 case BIT_AND_EXPR:
922 if (!int_fits_type_p (const_oprnd, half_type))
924 /* HALF_TYPE is not enough. Try a bigger type if possible. */
925 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
926 return false;
928 interm_type = build_nonstandard_integer_type (
929 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
930 if (!int_fits_type_p (const_oprnd, interm_type))
931 return false;
934 break;
936 case LSHIFT_EXPR:
937 /* Try intermediate type - HALF_TYPE is not enough for sure. */
938 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
939 return false;
941 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
942 (e.g., if the original value was char, the shift amount is at most 8
943 if we want to use short). */
944 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
945 return false;
947 interm_type = build_nonstandard_integer_type (
948 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
950 if (!vect_supportable_shift (code, interm_type))
951 return false;
953 break;
955 case RSHIFT_EXPR:
956 if (vect_supportable_shift (code, half_type))
957 break;
959 /* Try intermediate type - HALF_TYPE is not supported. */
960 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
961 return false;
963 interm_type = build_nonstandard_integer_type (
964 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
966 if (!vect_supportable_shift (code, interm_type))
967 return false;
969 break;
971 default:
972 gcc_unreachable ();
975 /* There are four possible cases:
976 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
977 the first statement in the sequence)
978 a. The original, HALF_TYPE, is not enough - we replace the promotion
979 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
980 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
981 promotion.
982 2. OPRND is defined by a pattern statement we created.
983 a. Its type is not sufficient for the operation, we create a new stmt:
984 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
985 this statement in NEW_DEF_STMT, and it is later put in
986 STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
987 b. OPRND is good to use in the new statement. */
988 if (first)
990 if (interm_type)
992 /* Replace the original type conversion HALF_TYPE->TYPE with
993 HALF_TYPE->INTERM_TYPE. */
994 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
996 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
997 /* Check if the already created pattern stmt is what we need. */
998 if (!is_gimple_assign (new_stmt)
999 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1000 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1001 return false;
1003 oprnd = gimple_assign_lhs (new_stmt);
1005 else
1007 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1008 oprnd = gimple_assign_rhs1 (def_stmt);
1009 tmp = create_tmp_reg (interm_type, NULL);
1010 add_referenced_var (tmp);
1011 new_oprnd = make_ssa_name (tmp, NULL);
1012 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1013 oprnd, NULL_TREE);
1014 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1015 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1016 oprnd = new_oprnd;
1019 else
1021 /* Retrieve the operand before the type promotion. */
1022 oprnd = gimple_assign_rhs1 (def_stmt);
1025 else
1027 if (interm_type)
1029 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1030 tmp = create_tmp_reg (interm_type, NULL);
1031 add_referenced_var (tmp);
1032 new_oprnd = make_ssa_name (tmp, NULL);
1033 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1034 oprnd, NULL_TREE);
1035 oprnd = new_oprnd;
1036 *new_def_stmt = new_stmt;
1039 /* Otherwise, OPRND is already set. */
1042 if (interm_type)
1043 *new_type = interm_type;
1044 else
1045 *new_type = half_type;
1047 *op0 = oprnd;
1048 *op1 = fold_convert (*new_type, const_oprnd);
1050 return true;
1054 /* Try to find a statement or a sequence of statements that can be performed
1055 on a smaller type:
1057 type x_t;
1058 TYPE x_T, res0_T, res1_T;
1059 loop:
1060 S1 x_t = *p;
1061 S2 x_T = (TYPE) x_t;
1062 S3 res0_T = op (x_T, C0);
1063 S4 res1_T = op (res0_T, C1);
1064 S5 ... = () res1_T; - type demotion
1066 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1067 constants.
1068 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1069 be 'type' or some intermediate type. For now, we expect S5 to be a type
1070 demotion operation. We also check that S3 and S4 have only one use.
1074 static gimple
1075 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1076 tree *type_in, tree *type_out)
1078 gimple stmt = VEC_pop (gimple, *stmts);
1079 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1080 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1081 imm_use_iterator imm_iter;
1082 use_operand_p use_p;
1083 int nuses = 0;
1084 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1085 bool first;
1086 struct loop *loop = (gimple_bb (stmt))->loop_father;
1088 first = true;
1089 while (1)
1091 if (!vinfo_for_stmt (stmt)
1092 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1093 return NULL;
1095 new_def_stmt = NULL;
1096 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1097 &op0, &op1, &new_def_stmt,
1098 stmts))
1100 if (first)
1101 return NULL;
1102 else
1103 break;
1106 /* STMT can be performed on a smaller type. Check its uses. */
1107 lhs = gimple_assign_lhs (stmt);
1108 nuses = 0;
1109 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1111 if (is_gimple_debug (USE_STMT (use_p)))
1112 continue;
1113 use_stmt = USE_STMT (use_p);
1114 nuses++;
1117 if (nuses != 1 || !is_gimple_assign (use_stmt)
1118 || !gimple_bb (use_stmt)
1119 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1120 return NULL;
1122 /* Create pattern statement for STMT. */
1123 vectype = get_vectype_for_scalar_type (new_type);
1124 if (!vectype)
1125 return NULL;
1127 /* We want to collect all the statements for which we create pattern
1128 statetments, except for the case when the last statement in the
1129 sequence doesn't have a corresponding pattern statement. In such
1130 case we associate the last pattern statement with the last statement
1131 in the sequence. Therefore, we only add an original statetement to
1132 the list if we know that it is not the last. */
1133 if (prev_stmt)
1134 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1136 var = vect_recog_temp_ssa_var (new_type, NULL);
1137 pattern_stmt
1138 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1139 op0, op1);
1140 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1141 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
1143 if (vect_print_dump_info (REPORT_DETAILS))
1145 fprintf (vect_dump, "created pattern stmt: ");
1146 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1149 prev_stmt = stmt;
1150 stmt = use_stmt;
1152 first = false;
1155 /* We got a sequence. We expect it to end with a type demotion operation.
1156 Otherwise, we quit (for now). There are three possible cases: the
1157 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1158 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1159 NEW_TYPE differs (we create a new conversion statement). */
1160 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1162 use_lhs = gimple_assign_lhs (use_stmt);
1163 use_type = TREE_TYPE (use_lhs);
1164 /* Support only type promotion or signedess change. */
1165 if (!INTEGRAL_TYPE_P (use_type)
1166 || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1167 return NULL;
1169 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1170 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1172 /* Create NEW_TYPE->USE_TYPE conversion. */
1173 tmp = create_tmp_reg (use_type, NULL);
1174 add_referenced_var (tmp);
1175 new_oprnd = make_ssa_name (tmp, NULL);
1176 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1177 var, NULL_TREE);
1178 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1180 *type_in = get_vectype_for_scalar_type (new_type);
1181 *type_out = get_vectype_for_scalar_type (use_type);
1183 /* We created a pattern statement for the last statement in the
1184 sequence, so we don't need to associate it with the pattern
1185 statement created for PREV_STMT. Therefore, we add PREV_STMT
1186 to the list in order to mark it later in vect_pattern_recog_1. */
1187 if (prev_stmt)
1188 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1190 else
1192 if (prev_stmt)
1193 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
1194 = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
1196 *type_in = vectype;
1197 *type_out = NULL_TREE;
1200 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1202 else
1203 /* TODO: support general case, create a conversion to the correct type. */
1204 return NULL;
1206 /* Pattern detected. */
1207 if (vect_print_dump_info (REPORT_DETAILS))
1209 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1210 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1213 return pattern_stmt;
1217 /* Function vect_recog_mixed_size_cond_pattern
1219 Try to find the following pattern:
1221 type x_t, y_t;
1222 TYPE a_T, b_T, c_T;
1223 loop:
1224 S1 a_T = x_t CMP y_t ? b_T : c_T;
1226 where type 'TYPE' is an integral type which has different size
1227 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1228 than 'type', the constants need to fit into an integer type
1229 with the same width as 'type'.
1231 Input:
1233 * LAST_STMT: A stmt from which the pattern search begins.
1235 Output:
1237 * TYPE_IN: The type of the input arguments to the pattern.
1239 * TYPE_OUT: The type of the output of this pattern.
1241 * Return value: A new stmt that will be used to replace the pattern.
1242 Additionally a def_stmt is added.
1244 a_it = x_t CMP y_t ? b_it : c_it;
1245 a_T = (TYPE) a_it; */
1247 static gimple
1248 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1249 tree *type_out)
1251 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1252 tree cond_expr, then_clause, else_clause;
1253 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1254 tree type, vectype, comp_vectype, itype, vecitype;
1255 enum machine_mode cmpmode;
1256 gimple pattern_stmt, def_stmt;
1257 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1259 if (!is_gimple_assign (last_stmt)
1260 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1261 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1262 return NULL;
1264 cond_expr = gimple_assign_rhs1 (last_stmt);
1265 then_clause = gimple_assign_rhs2 (last_stmt);
1266 else_clause = gimple_assign_rhs3 (last_stmt);
1268 if (TREE_CODE (then_clause) != INTEGER_CST
1269 || TREE_CODE (else_clause) != INTEGER_CST)
1270 return NULL;
1272 if (!COMPARISON_CLASS_P (cond_expr))
1273 return NULL;
1275 comp_vectype
1276 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1277 if (comp_vectype == NULL_TREE)
1278 return NULL;
1280 type = gimple_expr_type (last_stmt);
1281 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1283 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1284 return NULL;
1286 vectype = get_vectype_for_scalar_type (type);
1287 if (vectype == NULL_TREE)
1288 return NULL;
1290 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1291 return NULL;
1293 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1294 TYPE_UNSIGNED (type));
1295 if (itype == NULL_TREE
1296 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1297 return NULL;
1299 vecitype = get_vectype_for_scalar_type (itype);
1300 if (vecitype == NULL_TREE)
1301 return NULL;
1303 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1304 return NULL;
1306 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1308 if (!int_fits_type_p (then_clause, itype)
1309 || !int_fits_type_p (else_clause, itype))
1310 return NULL;
1313 def_stmt
1314 = gimple_build_assign_with_ops3 (COND_EXPR,
1315 vect_recog_temp_ssa_var (itype, NULL),
1316 unshare_expr (cond_expr),
1317 fold_convert (itype, then_clause),
1318 fold_convert (itype, else_clause));
1319 pattern_stmt
1320 = gimple_build_assign_with_ops (NOP_EXPR,
1321 vect_recog_temp_ssa_var (type, NULL),
1322 gimple_assign_lhs (def_stmt), NULL_TREE);
1324 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
1325 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1326 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1327 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1328 *type_in = vecitype;
1329 *type_out = vectype;
1331 return pattern_stmt;
1335 /* Mark statements that are involved in a pattern. */
1337 static inline void
1338 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
1339 tree pattern_vectype)
1341 stmt_vec_info pattern_stmt_info, def_stmt_info;
1342 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1343 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
1344 gimple def_stmt;
1346 set_vinfo_for_stmt (pattern_stmt,
1347 new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
1348 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1349 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
1351 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
1352 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
1353 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1354 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
1355 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
1356 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
1357 STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
1358 = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
1359 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
1361 def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
1362 def_stmt_info = vinfo_for_stmt (def_stmt);
1363 if (def_stmt_info == NULL)
1365 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1366 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1368 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
1369 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
1370 STMT_VINFO_DEF_TYPE (def_stmt_info)
1371 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1372 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
1373 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
1377 /* Function vect_pattern_recog_1
1379 Input:
1380 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
1381 computation pattern.
1382 STMT: A stmt from which the pattern search should start.
1384 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
1385 expression that computes the same functionality and can be used to
1386 replace the sequence of stmts that are involved in the pattern.
1388 Output:
1389 This function checks if the expression returned by PATTERN_RECOG_FUNC is
1390 supported in vector form by the target. We use 'TYPE_IN' to obtain the
1391 relevant vector type. If 'TYPE_IN' is already a vector type, then this
1392 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
1393 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
1394 to the available target pattern.
1396 This function also does some bookkeeping, as explained in the documentation
1397 for vect_recog_pattern. */
1399 static void
1400 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
1401 gimple_stmt_iterator si,
1402 VEC (gimple, heap) **stmts_to_replace)
1404 gimple stmt = gsi_stmt (si), pattern_stmt;
1405 stmt_vec_info stmt_info;
1406 loop_vec_info loop_vinfo;
1407 tree pattern_vectype;
1408 tree type_in, type_out;
1409 enum tree_code code;
1410 int i;
1411 gimple next;
1413 VEC_truncate (gimple, *stmts_to_replace, 0);
1414 VEC_quick_push (gimple, *stmts_to_replace, stmt);
1415 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
1416 if (!pattern_stmt)
1417 return;
1419 stmt = VEC_last (gimple, *stmts_to_replace);
1420 stmt_info = vinfo_for_stmt (stmt);
1421 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1423 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
1425 /* No need to check target support (already checked by the pattern
1426 recognition function). */
1427 if (type_out)
1428 gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out)));
1429 pattern_vectype = type_out ? type_out : type_in;
1431 else
1433 enum machine_mode vec_mode;
1434 enum insn_code icode;
1435 optab optab;
1437 /* Check target support */
1438 type_in = get_vectype_for_scalar_type (type_in);
1439 if (!type_in)
1440 return;
1441 if (type_out)
1442 type_out = get_vectype_for_scalar_type (type_out);
1443 else
1444 type_out = type_in;
1445 if (!type_out)
1446 return;
1447 pattern_vectype = type_out;
1449 if (is_gimple_assign (pattern_stmt))
1450 code = gimple_assign_rhs_code (pattern_stmt);
1451 else
1453 gcc_assert (is_gimple_call (pattern_stmt));
1454 code = CALL_EXPR;
1457 optab = optab_for_tree_code (code, type_in, optab_default);
1458 vec_mode = TYPE_MODE (type_in);
1459 if (!optab
1460 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
1461 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
1462 return;
1465 /* Found a vectorizable pattern. */
1466 if (vect_print_dump_info (REPORT_DETAILS))
1468 fprintf (vect_dump, "pattern recognized: ");
1469 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1472 /* Mark the stmts that are involved in the pattern. */
1473 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
1475 /* Patterns cannot be vectorized using SLP, because they change the order of
1476 computation. */
1477 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
1478 if (next == stmt)
1479 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
1481 /* It is possible that additional pattern stmts are created and inserted in
1482 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
1483 relevant statements. */
1484 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
1485 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
1486 i++)
1488 stmt_info = vinfo_for_stmt (stmt);
1489 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1490 if (vect_print_dump_info (REPORT_DETAILS))
1492 fprintf (vect_dump, "additional pattern stmt: ");
1493 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1496 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
1501 /* Function vect_pattern_recog
1503 Input:
1504 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
1505 computation idioms.
1507 Output - for each computation idiom that is detected we create a new stmt
1508 that provides the same functionality and that can be vectorized. We
1509 also record some information in the struct_stmt_info of the relevant
1510 stmts, as explained below:
1512 At the entry to this function we have the following stmts, with the
1513 following initial value in the STMT_VINFO fields:
1515 stmt in_pattern_p related_stmt vec_stmt
1516 S1: a_i = .... - - -
1517 S2: a_2 = ..use(a_i).. - - -
1518 S3: a_1 = ..use(a_2).. - - -
1519 S4: a_0 = ..use(a_1).. - - -
1520 S5: ... = ..use(a_0).. - - -
1522 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
1523 represented by a single stmt. We then:
1524 - create a new stmt S6 equivalent to the pattern (the stmt is not
1525 inserted into the code)
1526 - fill in the STMT_VINFO fields as follows:
1528 in_pattern_p related_stmt vec_stmt
1529 S1: a_i = .... - - -
1530 S2: a_2 = ..use(a_i).. - - -
1531 S3: a_1 = ..use(a_2).. - - -
1532 S4: a_0 = ..use(a_1).. true S6 -
1533 '---> S6: a_new = .... - S4 -
1534 S5: ... = ..use(a_0).. - - -
1536 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
1537 to each other through the RELATED_STMT field).
1539 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
1540 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
1541 remain irrelevant unless used by stmts other than S4.
1543 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
1544 (because they are marked as irrelevant). It will vectorize S6, and record
1545 a pointer to the new vector stmt VS6 from S6 (as usual).
1546 S4 will be skipped, and S5 will be vectorized as usual:
1548 in_pattern_p related_stmt vec_stmt
1549 S1: a_i = .... - - -
1550 S2: a_2 = ..use(a_i).. - - -
1551 S3: a_1 = ..use(a_2).. - - -
1552 > VS6: va_new = .... - - -
1553 S4: a_0 = ..use(a_1).. true S6 VS6
1554 '---> S6: a_new = .... - S4 VS6
1555 > VS5: ... = ..vuse(va_new).. - - -
1556 S5: ... = ..use(a_0).. - - -
1558 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
1559 elsewhere), and we'll end up with:
1561 VS6: va_new = ....
1562 VS5: ... = ..vuse(va_new)..
1564 In case of more than one pattern statements, e.g., widen-mult with
1565 intermediate type:
1567 S1 a_t = ;
1568 S2 a_T = (TYPE) a_t;
1569 '--> S3: a_it = (interm_type) a_t;
1570 S4 prod_T = a_T * CONST;
1571 '--> S5: prod_T' = a_it w* CONST;
1573 there may be other users of a_T outside the pattern. In that case S2 will
1574 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
1575 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
1576 be recorded in S3. */
1578 void
1579 vect_pattern_recog (loop_vec_info loop_vinfo)
1581 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1582 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1583 unsigned int nbbs = loop->num_nodes;
1584 gimple_stmt_iterator si;
1585 unsigned int i, j;
1586 vect_recog_func_ptr vect_recog_func;
1587 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
1589 if (vect_print_dump_info (REPORT_DETAILS))
1590 fprintf (vect_dump, "=== vect_pattern_recog ===");
1592 /* Scan through the loop stmts, applying the pattern recognition
1593 functions starting at each stmt visited: */
1594 for (i = 0; i < nbbs; i++)
1596 basic_block bb = bbs[i];
1597 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1599 /* Scan over all generic vect_recog_xxx_pattern functions. */
1600 for (j = 0; j < NUM_PATTERNS; j++)
1602 vect_recog_func = vect_vect_recog_func_ptrs[j];
1603 vect_pattern_recog_1 (vect_recog_func, si,
1604 &stmts_to_replace);
1609 VEC_free (gimple, heap, stmts_to_replace);