Fix typo in my last change
[official-gcc.git] / gcc / tree-vect-patterns.c
blob44a37b91e387aa39029fea2dbe485ecb0b8a282b
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
54 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
55 vect_recog_widen_mult_pattern,
56 vect_recog_widen_sum_pattern,
57 vect_recog_dot_prod_pattern,
58 vect_recog_pow_pattern,
59 vect_recog_over_widening_pattern,
60 vect_recog_mixed_size_cond_pattern};
63 /* Function widened_name_p
65 Check whether NAME, an ssa-name used in USE_STMT,
66 is a result of a type-promotion, such that:
67 DEF_STMT: NAME = NOP (name0)
68 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
69 If CHECK_SIGN is TRUE, check that either both types are signed or both are
70 unsigned. */
72 static bool
73 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
74 bool check_sign)
76 tree dummy;
77 gimple dummy_gimple;
78 loop_vec_info loop_vinfo;
79 stmt_vec_info stmt_vinfo;
80 tree type = TREE_TYPE (name);
81 tree oprnd0;
82 enum vect_def_type dt;
83 tree def;
85 stmt_vinfo = vinfo_for_stmt (use_stmt);
86 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
88 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
89 return false;
91 if (dt != vect_internal_def
92 && dt != vect_external_def && dt != vect_constant_def)
93 return false;
95 if (! *def_stmt)
96 return false;
98 if (!is_gimple_assign (*def_stmt))
99 return false;
101 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
102 return false;
104 oprnd0 = gimple_assign_rhs1 (*def_stmt);
106 *half_type = TREE_TYPE (oprnd0);
107 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
108 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
109 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
110 return false;
112 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
113 &dt))
114 return false;
116 return true;
119 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
120 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
122 static tree
123 vect_recog_temp_ssa_var (tree type, gimple stmt)
125 tree var = create_tmp_var (type, "patt");
127 add_referenced_var (var);
128 var = make_ssa_name (var, stmt);
129 return var;
132 /* Function vect_recog_dot_prod_pattern
134 Try to find the following pattern:
136 type x_t, y_t;
137 TYPE1 prod;
138 TYPE2 sum = init;
139 loop:
140 sum_0 = phi <init, sum_1>
141 S1 x_t = ...
142 S2 y_t = ...
143 S3 x_T = (TYPE1) x_t;
144 S4 y_T = (TYPE1) y_t;
145 S5 prod = x_T * y_T;
146 [S6 prod = (TYPE2) prod; #optional]
147 S7 sum_1 = prod + sum_0;
149 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
150 same size of 'TYPE1' or bigger. This is a special case of a reduction
151 computation.
153 Input:
155 * STMTS: Contains a stmt from which the pattern search begins. In the
156 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
157 will be detected.
159 Output:
161 * TYPE_IN: The type of the input arguments to the pattern.
163 * TYPE_OUT: The type of the output of this pattern.
165 * Return value: A new stmt that will be used to replace the sequence of
166 stmts that constitute the pattern. In this case it will be:
167 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
169 Note: The dot-prod idiom is a widening reduction pattern that is
170 vectorized without preserving all the intermediate results. It
171 produces only N/2 (widened) results (by summing up pairs of
172 intermediate results) rather than all N results. Therefore, we
173 cannot allow this pattern when we want to get all the results and in
174 the correct order (as is the case when this computation is in an
175 inner-loop nested in an outer-loop that us being vectorized). */
177 static gimple
178 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
179 tree *type_out)
181 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
182 tree oprnd0, oprnd1;
183 tree oprnd00, oprnd01;
184 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
185 tree type, half_type;
186 gimple pattern_stmt;
187 tree prod_type;
188 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
189 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
190 tree var;
192 if (!is_gimple_assign (last_stmt))
193 return NULL;
195 type = gimple_expr_type (last_stmt);
197 /* Look for the following pattern
198 DX = (TYPE1) X;
199 DY = (TYPE1) Y;
200 DPROD = DX * DY;
201 DDPROD = (TYPE2) DPROD;
202 sum_1 = DDPROD + sum_0;
203 In which
204 - DX is double the size of X
205 - DY is double the size of Y
206 - DX, DY, DPROD all have the same type
207 - sum is the same size of DPROD or bigger
208 - sum has been recognized as a reduction variable.
210 This is equivalent to:
211 DPROD = X w* Y; #widen mult
212 sum_1 = DPROD w+ sum_0; #widen summation
214 DPROD = X w* Y; #widen mult
215 sum_1 = DPROD + sum_0; #summation
218 /* Starting from LAST_STMT, follow the defs of its uses in search
219 of the above pattern. */
221 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
222 return NULL;
224 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
226 /* Has been detected as widening-summation? */
228 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
229 type = gimple_expr_type (stmt);
230 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
231 return NULL;
232 oprnd0 = gimple_assign_rhs1 (stmt);
233 oprnd1 = gimple_assign_rhs2 (stmt);
234 half_type = TREE_TYPE (oprnd0);
236 else
238 gimple def_stmt;
240 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
241 return NULL;
242 oprnd0 = gimple_assign_rhs1 (last_stmt);
243 oprnd1 = gimple_assign_rhs2 (last_stmt);
244 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
245 || !types_compatible_p (TREE_TYPE (oprnd1), type))
246 return NULL;
247 stmt = last_stmt;
249 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
251 stmt = def_stmt;
252 oprnd0 = gimple_assign_rhs1 (stmt);
254 else
255 half_type = type;
258 /* So far so good. Since last_stmt was detected as a (summation) reduction,
259 we know that oprnd1 is the reduction variable (defined by a loop-header
260 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
261 Left to check that oprnd0 is defined by a (widen_)mult_expr */
262 if (TREE_CODE (oprnd0) != SSA_NAME)
263 return NULL;
265 prod_type = half_type;
266 stmt = SSA_NAME_DEF_STMT (oprnd0);
268 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
269 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
270 return NULL;
272 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
273 inside the loop (in case we are analyzing an outer-loop). */
274 if (!is_gimple_assign (stmt))
275 return NULL;
276 stmt_vinfo = vinfo_for_stmt (stmt);
277 gcc_assert (stmt_vinfo);
278 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
279 return NULL;
280 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
281 return NULL;
282 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
284 /* Has been detected as a widening multiplication? */
286 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
287 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
288 return NULL;
289 stmt_vinfo = vinfo_for_stmt (stmt);
290 gcc_assert (stmt_vinfo);
291 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
292 oprnd00 = gimple_assign_rhs1 (stmt);
293 oprnd01 = gimple_assign_rhs2 (stmt);
295 else
297 tree half_type0, half_type1;
298 gimple def_stmt;
299 tree oprnd0, oprnd1;
301 oprnd0 = gimple_assign_rhs1 (stmt);
302 oprnd1 = gimple_assign_rhs2 (stmt);
303 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
304 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
305 return NULL;
306 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
307 return NULL;
308 oprnd00 = gimple_assign_rhs1 (def_stmt);
309 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
310 return NULL;
311 oprnd01 = gimple_assign_rhs1 (def_stmt);
312 if (!types_compatible_p (half_type0, half_type1))
313 return NULL;
314 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
315 return NULL;
318 half_type = TREE_TYPE (oprnd00);
319 *type_in = half_type;
320 *type_out = type;
322 /* Pattern detected. Create a stmt to be used to replace the pattern: */
323 var = vect_recog_temp_ssa_var (type, NULL);
324 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
325 oprnd00, oprnd01, oprnd1);
327 if (vect_print_dump_info (REPORT_DETAILS))
329 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
330 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
333 /* We don't allow changing the order of the computation in the inner-loop
334 when doing outer-loop vectorization. */
335 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
337 return pattern_stmt;
341 /* Handle two cases of multiplication by a constant. The first one is when
342 the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
343 operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
344 TYPE.
346 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
347 HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
348 TYPE), we can perform widen-mult from the intermediate type to TYPE and
349 replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
351 static bool
352 vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
353 VEC (gimple, heap) **stmts, tree type,
354 tree *half_type, gimple def_stmt)
356 tree new_type, new_oprnd, tmp;
357 gimple new_stmt;
358 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
359 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
361 if (int_fits_type_p (const_oprnd, *half_type))
363 /* CONST_OPRND is a constant of HALF_TYPE. */
364 *oprnd = gimple_assign_rhs1 (def_stmt);
365 return true;
368 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
369 || !gimple_bb (def_stmt)
370 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
371 || !vinfo_for_stmt (def_stmt))
372 return false;
374 /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
375 a type 2 times bigger than HALF_TYPE. */
376 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
377 TYPE_UNSIGNED (type));
378 if (!int_fits_type_p (const_oprnd, new_type))
379 return false;
381 /* Use NEW_TYPE for widen_mult. */
382 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
384 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
385 /* Check if the already created pattern stmt is what we need. */
386 if (!is_gimple_assign (new_stmt)
387 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
388 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
389 return false;
391 VEC_safe_push (gimple, heap, *stmts, def_stmt);
392 *oprnd = gimple_assign_lhs (new_stmt);
394 else
396 /* Create a_T = (NEW_TYPE) a_t; */
397 *oprnd = gimple_assign_rhs1 (def_stmt);
398 tmp = create_tmp_var (new_type, NULL);
399 add_referenced_var (tmp);
400 new_oprnd = make_ssa_name (tmp, NULL);
401 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
402 NULL_TREE);
403 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
404 VEC_safe_push (gimple, heap, *stmts, def_stmt);
405 *oprnd = new_oprnd;
408 *half_type = new_type;
409 return true;
413 /* Function vect_recog_widen_mult_pattern
415 Try to find the following pattern:
417 type a_t, b_t;
418 TYPE a_T, b_T, prod_T;
420 S1 a_t = ;
421 S2 b_t = ;
422 S3 a_T = (TYPE) a_t;
423 S4 b_T = (TYPE) b_t;
424 S5 prod_T = a_T * b_T;
426 where type 'TYPE' is at least double the size of type 'type'.
428 Also detect unsgigned cases:
430 unsigned type a_t, b_t;
431 unsigned TYPE u_prod_T;
432 TYPE a_T, b_T, prod_T;
434 S1 a_t = ;
435 S2 b_t = ;
436 S3 a_T = (TYPE) a_t;
437 S4 b_T = (TYPE) b_t;
438 S5 prod_T = a_T * b_T;
439 S6 u_prod_T = (unsigned TYPE) prod_T;
441 and multiplication by constants:
443 type a_t;
444 TYPE a_T, prod_T;
446 S1 a_t = ;
447 S3 a_T = (TYPE) a_t;
448 S5 prod_T = a_T * CONST;
450 A special case of multiplication by constants is when 'TYPE' is 4 times
451 bigger than 'type', but CONST fits an intermediate type 2 times smaller
452 than 'TYPE'. In that case we create an additional pattern stmt for S3
453 to create a variable of the intermediate type, and perform widen-mult
454 on the intermediate type as well:
456 type a_t;
457 interm_type a_it;
458 TYPE a_T, prod_T, prod_T';
460 S1 a_t = ;
461 S3 a_T = (TYPE) a_t;
462 '--> a_it = (interm_type) a_t;
463 S5 prod_T = a_T * CONST;
464 '--> prod_T' = a_it w* CONST;
466 Input/Output:
468 * STMTS: Contains a stmt from which the pattern search begins. In the
469 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
470 is detected. In case of unsigned widen-mult, the original stmt (S5) is
471 replaced with S6 in STMTS. In case of multiplication by a constant
472 of an intermediate type (the last case above), STMTS also contains S3
473 (inserted before S5).
475 Output:
477 * TYPE_IN: The type of the input arguments to the pattern.
479 * TYPE_OUT: The type of the output of this pattern.
481 * Return value: A new stmt that will be used to replace the sequence of
482 stmts that constitute the pattern. In this case it will be:
483 WIDEN_MULT <a_t, b_t>
486 static gimple
487 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
488 tree *type_in, tree *type_out)
490 gimple last_stmt = VEC_pop (gimple, *stmts);
491 gimple def_stmt0, def_stmt1;
492 tree oprnd0, oprnd1;
493 tree type, half_type0, half_type1;
494 gimple pattern_stmt;
495 tree vectype, vectype_out = NULL_TREE;
496 tree dummy;
497 tree var;
498 enum tree_code dummy_code;
499 int dummy_int;
500 VEC (tree, heap) *dummy_vec;
501 bool op0_ok, op1_ok;
503 if (!is_gimple_assign (last_stmt))
504 return NULL;
506 type = gimple_expr_type (last_stmt);
508 /* Starting from LAST_STMT, follow the defs of its uses in search
509 of the above pattern. */
511 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
512 return NULL;
514 oprnd0 = gimple_assign_rhs1 (last_stmt);
515 oprnd1 = gimple_assign_rhs2 (last_stmt);
516 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
517 || !types_compatible_p (TREE_TYPE (oprnd1), type))
518 return NULL;
520 /* Check argument 0. */
521 op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
522 /* Check argument 1. */
523 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
525 /* In case of multiplication by a constant one of the operands may not match
526 the pattern, but not both. */
527 if (!op0_ok && !op1_ok)
528 return NULL;
530 if (op0_ok && op1_ok)
532 oprnd0 = gimple_assign_rhs1 (def_stmt0);
533 oprnd1 = gimple_assign_rhs1 (def_stmt1);
535 else if (!op0_ok)
537 if (TREE_CODE (oprnd0) == INTEGER_CST
538 && TREE_CODE (half_type1) == INTEGER_TYPE
539 && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1,
540 stmts, type,
541 &half_type1, def_stmt1))
542 half_type0 = half_type1;
543 else
544 return NULL;
546 else if (!op1_ok)
548 if (TREE_CODE (oprnd1) == INTEGER_CST
549 && TREE_CODE (half_type0) == INTEGER_TYPE
550 && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
551 stmts, type,
552 &half_type0, def_stmt0))
553 half_type1 = half_type0;
554 else
555 return NULL;
558 /* Handle unsigned case. Look for
559 S6 u_prod_T = (unsigned TYPE) prod_T;
560 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
561 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
563 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
564 imm_use_iterator imm_iter;
565 use_operand_p use_p;
566 int nuses = 0;
567 gimple use_stmt = NULL;
568 tree use_type;
570 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
571 return NULL;
573 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
575 if (is_gimple_debug (USE_STMT (use_p)))
576 continue;
577 use_stmt = USE_STMT (use_p);
578 nuses++;
581 if (nuses != 1 || !is_gimple_assign (use_stmt)
582 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
583 return NULL;
585 use_lhs = gimple_assign_lhs (use_stmt);
586 use_type = TREE_TYPE (use_lhs);
587 if (!INTEGRAL_TYPE_P (use_type)
588 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
589 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
590 return NULL;
592 type = use_type;
593 last_stmt = use_stmt;
596 if (!types_compatible_p (half_type0, half_type1))
597 return NULL;
599 /* Pattern detected. */
600 if (vect_print_dump_info (REPORT_DETAILS))
601 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
603 /* Check target support */
604 vectype = get_vectype_for_scalar_type (half_type0);
605 vectype_out = get_vectype_for_scalar_type (type);
606 if (!vectype
607 || !vectype_out
608 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
609 vectype_out, vectype,
610 &dummy, &dummy, &dummy_code,
611 &dummy_code, &dummy_int, &dummy_vec))
612 return NULL;
614 *type_in = vectype;
615 *type_out = vectype_out;
617 /* Pattern supported. Create a stmt to be used to replace the pattern: */
618 var = vect_recog_temp_ssa_var (type, NULL);
619 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
620 oprnd1);
622 if (vect_print_dump_info (REPORT_DETAILS))
623 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
625 VEC_safe_push (gimple, heap, *stmts, last_stmt);
626 return pattern_stmt;
630 /* Function vect_recog_pow_pattern
632 Try to find the following pattern:
634 x = POW (y, N);
636 with POW being one of pow, powf, powi, powif and N being
637 either 2 or 0.5.
639 Input:
641 * LAST_STMT: A stmt from which the pattern search begins.
643 Output:
645 * TYPE_IN: The type of the input arguments to the pattern.
647 * TYPE_OUT: The type of the output of this pattern.
649 * Return value: A new stmt that will be used to replace the sequence of
650 stmts that constitute the pattern. In this case it will be:
651 x = x * x
653 x = sqrt (x)
656 static gimple
657 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
658 tree *type_out)
660 gimple last_stmt = VEC_index (gimple, *stmts, 0);
661 tree fn, base, exp = NULL;
662 gimple stmt;
663 tree var;
665 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
666 return NULL;
668 fn = gimple_call_fndecl (last_stmt);
669 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
670 return NULL;
672 switch (DECL_FUNCTION_CODE (fn))
674 case BUILT_IN_POWIF:
675 case BUILT_IN_POWI:
676 case BUILT_IN_POWF:
677 case BUILT_IN_POW:
678 base = gimple_call_arg (last_stmt, 0);
679 exp = gimple_call_arg (last_stmt, 1);
680 if (TREE_CODE (exp) != REAL_CST
681 && TREE_CODE (exp) != INTEGER_CST)
682 return NULL;
683 break;
685 default:
686 return NULL;
689 /* We now have a pow or powi builtin function call with a constant
690 exponent. */
692 *type_out = NULL_TREE;
694 /* Catch squaring. */
695 if ((host_integerp (exp, 0)
696 && tree_low_cst (exp, 0) == 2)
697 || (TREE_CODE (exp) == REAL_CST
698 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
700 *type_in = TREE_TYPE (base);
702 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
703 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
704 return stmt;
707 /* Catch square root. */
708 if (TREE_CODE (exp) == REAL_CST
709 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
711 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
712 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
713 if (*type_in)
715 gimple stmt = gimple_build_call (newfn, 1, base);
716 if (vectorizable_function (stmt, *type_in, *type_in)
717 != NULL_TREE)
719 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
720 gimple_call_set_lhs (stmt, var);
721 return stmt;
726 return NULL;
730 /* Function vect_recog_widen_sum_pattern
732 Try to find the following pattern:
734 type x_t;
735 TYPE x_T, sum = init;
736 loop:
737 sum_0 = phi <init, sum_1>
738 S1 x_t = *p;
739 S2 x_T = (TYPE) x_t;
740 S3 sum_1 = x_T + sum_0;
742 where type 'TYPE' is at least double the size of type 'type', i.e - we're
743 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
744 a special case of a reduction computation.
746 Input:
748 * LAST_STMT: A stmt from which the pattern search begins. In the example,
749 when this function is called with S3, the pattern {S2,S3} will be detected.
751 Output:
753 * TYPE_IN: The type of the input arguments to the pattern.
755 * TYPE_OUT: The type of the output of this pattern.
757 * Return value: A new stmt that will be used to replace the sequence of
758 stmts that constitute the pattern. In this case it will be:
759 WIDEN_SUM <x_t, sum_0>
761 Note: The widening-sum idiom is a widening reduction pattern that is
762 vectorized without preserving all the intermediate results. It
763 produces only N/2 (widened) results (by summing up pairs of
764 intermediate results) rather than all N results. Therefore, we
765 cannot allow this pattern when we want to get all the results and in
766 the correct order (as is the case when this computation is in an
767 inner-loop nested in an outer-loop that us being vectorized). */
769 static gimple
770 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
771 tree *type_out)
773 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
774 tree oprnd0, oprnd1;
775 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
776 tree type, half_type;
777 gimple pattern_stmt;
778 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
779 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
780 tree var;
782 if (!is_gimple_assign (last_stmt))
783 return NULL;
785 type = gimple_expr_type (last_stmt);
787 /* Look for the following pattern
788 DX = (TYPE) X;
789 sum_1 = DX + sum_0;
790 In which DX is at least double the size of X, and sum_1 has been
791 recognized as a reduction variable.
794 /* Starting from LAST_STMT, follow the defs of its uses in search
795 of the above pattern. */
797 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
798 return NULL;
800 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
801 return NULL;
803 oprnd0 = gimple_assign_rhs1 (last_stmt);
804 oprnd1 = gimple_assign_rhs2 (last_stmt);
805 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
806 || !types_compatible_p (TREE_TYPE (oprnd1), type))
807 return NULL;
809 /* So far so good. Since last_stmt was detected as a (summation) reduction,
810 we know that oprnd1 is the reduction variable (defined by a loop-header
811 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
812 Left to check that oprnd0 is defined by a cast from type 'type' to type
813 'TYPE'. */
815 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
816 return NULL;
818 oprnd0 = gimple_assign_rhs1 (stmt);
819 *type_in = half_type;
820 *type_out = type;
822 /* Pattern detected. Create a stmt to be used to replace the pattern: */
823 var = vect_recog_temp_ssa_var (type, NULL);
824 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
825 oprnd0, oprnd1);
827 if (vect_print_dump_info (REPORT_DETAILS))
829 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
830 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
833 /* We don't allow changing the order of the computation in the inner-loop
834 when doing outer-loop vectorization. */
835 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
837 return pattern_stmt;
841 /* Return TRUE if the operation in STMT can be performed on a smaller type.
843 Input:
844 STMT - a statement to check.
845 DEF - we support operations with two operands, one of which is constant.
846 The other operand can be defined by a demotion operation, or by a
847 previous statement in a sequence of over-promoted operations. In the
848 later case DEF is used to replace that operand. (It is defined by a
849 pattern statement we created for the previous statement in the
850 sequence).
852 Input/output:
853 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
854 NULL, it's the type of DEF.
855 STMTS - additional pattern statements. If a pattern statement (type
856 conversion) is created in this function, its original statement is
857 added to STMTS.
859 Output:
860 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
861 operands to use in the new pattern statement for STMT (will be created
862 in vect_recog_over_widening_pattern ()).
863 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
864 statements for STMT: the first one is a type promotion and the second
865 one is the operation itself. We return the type promotion statement
866 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
867 the second pattern statement. */
869 static bool
870 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
871 tree *op0, tree *op1, gimple *new_def_stmt,
872 VEC (gimple, heap) **stmts)
874 enum tree_code code;
875 tree const_oprnd, oprnd;
876 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
877 gimple def_stmt, new_stmt;
878 bool first = false;
879 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
880 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
882 *new_def_stmt = NULL;
884 if (!is_gimple_assign (stmt))
885 return false;
887 code = gimple_assign_rhs_code (stmt);
888 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
889 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
890 return false;
892 oprnd = gimple_assign_rhs1 (stmt);
893 const_oprnd = gimple_assign_rhs2 (stmt);
894 type = gimple_expr_type (stmt);
896 if (TREE_CODE (oprnd) != SSA_NAME
897 || TREE_CODE (const_oprnd) != INTEGER_CST)
898 return false;
900 /* If we are in the middle of a sequence, we use DEF from a previous
901 statement. Otherwise, OPRND has to be a result of type promotion. */
902 if (*new_type)
904 half_type = *new_type;
905 oprnd = def;
907 else
909 first = true;
910 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
911 || !gimple_bb (def_stmt)
912 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
913 || !vinfo_for_stmt (def_stmt))
914 return false;
917 /* Can we perform the operation on a smaller type? */
918 switch (code)
920 case BIT_IOR_EXPR:
921 case BIT_XOR_EXPR:
922 case BIT_AND_EXPR:
923 if (!int_fits_type_p (const_oprnd, half_type))
925 /* HALF_TYPE is not enough. Try a bigger type if possible. */
926 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
927 return false;
929 interm_type = build_nonstandard_integer_type (
930 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
931 if (!int_fits_type_p (const_oprnd, interm_type))
932 return false;
935 break;
937 case LSHIFT_EXPR:
938 /* Try intermediate type - HALF_TYPE is not enough for sure. */
939 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
940 return false;
942 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
943 (e.g., if the original value was char, the shift amount is at most 8
944 if we want to use short). */
945 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
946 return false;
948 interm_type = build_nonstandard_integer_type (
949 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
951 if (!vect_supportable_shift (code, interm_type))
952 return false;
954 break;
956 case RSHIFT_EXPR:
957 if (vect_supportable_shift (code, half_type))
958 break;
960 /* Try intermediate type - HALF_TYPE is not supported. */
961 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
962 return false;
964 interm_type = build_nonstandard_integer_type (
965 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
967 if (!vect_supportable_shift (code, interm_type))
968 return false;
970 break;
972 default:
973 gcc_unreachable ();
976 /* There are four possible cases:
977 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
978 the first statement in the sequence)
979 a. The original, HALF_TYPE, is not enough - we replace the promotion
980 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
981 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
982 promotion.
983 2. OPRND is defined by a pattern statement we created.
984 a. Its type is not sufficient for the operation, we create a new stmt:
985 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
986 this statement in NEW_DEF_STMT, and it is later put in
987 STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
988 b. OPRND is good to use in the new statement. */
989 if (first)
991 if (interm_type)
993 /* Replace the original type conversion HALF_TYPE->TYPE with
994 HALF_TYPE->INTERM_TYPE. */
995 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
997 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
998 /* Check if the already created pattern stmt is what we need. */
999 if (!is_gimple_assign (new_stmt)
1000 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1001 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1002 return false;
1004 oprnd = gimple_assign_lhs (new_stmt);
1006 else
1008 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1009 oprnd = gimple_assign_rhs1 (def_stmt);
1010 tmp = create_tmp_reg (interm_type, NULL);
1011 add_referenced_var (tmp);
1012 new_oprnd = make_ssa_name (tmp, NULL);
1013 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1014 oprnd, NULL_TREE);
1015 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1016 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1017 oprnd = new_oprnd;
1020 else
1022 /* Retrieve the operand before the type promotion. */
1023 oprnd = gimple_assign_rhs1 (def_stmt);
1026 else
1028 if (interm_type)
1030 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1031 tmp = create_tmp_reg (interm_type, NULL);
1032 add_referenced_var (tmp);
1033 new_oprnd = make_ssa_name (tmp, NULL);
1034 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1035 oprnd, NULL_TREE);
1036 oprnd = new_oprnd;
1037 *new_def_stmt = new_stmt;
1040 /* Otherwise, OPRND is already set. */
1043 if (interm_type)
1044 *new_type = interm_type;
1045 else
1046 *new_type = half_type;
1048 *op0 = oprnd;
1049 *op1 = fold_convert (*new_type, const_oprnd);
1051 return true;
1055 /* Try to find a statement or a sequence of statements that can be performed
1056 on a smaller type:
1058 type x_t;
1059 TYPE x_T, res0_T, res1_T;
1060 loop:
1061 S1 x_t = *p;
1062 S2 x_T = (TYPE) x_t;
1063 S3 res0_T = op (x_T, C0);
1064 S4 res1_T = op (res0_T, C1);
1065 S5 ... = () res1_T; - type demotion
1067 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1068 constants.
1069 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1070 be 'type' or some intermediate type. For now, we expect S5 to be a type
1071 demotion operation. We also check that S3 and S4 have only one use.
1075 static gimple
1076 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1077 tree *type_in, tree *type_out)
1079 gimple stmt = VEC_pop (gimple, *stmts);
1080 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1081 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1082 imm_use_iterator imm_iter;
1083 use_operand_p use_p;
1084 int nuses = 0;
1085 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1086 bool first;
1087 struct loop *loop = (gimple_bb (stmt))->loop_father;
1089 first = true;
1090 while (1)
1092 if (!vinfo_for_stmt (stmt)
1093 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1094 return NULL;
1096 new_def_stmt = NULL;
1097 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1098 &op0, &op1, &new_def_stmt,
1099 stmts))
1101 if (first)
1102 return NULL;
1103 else
1104 break;
1107 /* STMT can be performed on a smaller type. Check its uses. */
1108 lhs = gimple_assign_lhs (stmt);
1109 nuses = 0;
1110 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1112 if (is_gimple_debug (USE_STMT (use_p)))
1113 continue;
1114 use_stmt = USE_STMT (use_p);
1115 nuses++;
1118 if (nuses != 1 || !is_gimple_assign (use_stmt)
1119 || !gimple_bb (use_stmt)
1120 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1121 return NULL;
1123 /* Create pattern statement for STMT. */
1124 vectype = get_vectype_for_scalar_type (new_type);
1125 if (!vectype)
1126 return NULL;
1128 /* We want to collect all the statements for which we create pattern
1129 statetments, except for the case when the last statement in the
1130 sequence doesn't have a corresponding pattern statement. In such
1131 case we associate the last pattern statement with the last statement
1132 in the sequence. Therefore, we only add an original statetement to
1133 the list if we know that it is not the last. */
1134 if (prev_stmt)
1135 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1137 var = vect_recog_temp_ssa_var (new_type, NULL);
1138 pattern_stmt
1139 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1140 op0, op1);
1141 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1142 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
1144 if (vect_print_dump_info (REPORT_DETAILS))
1146 fprintf (vect_dump, "created pattern stmt: ");
1147 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1150 prev_stmt = stmt;
1151 stmt = use_stmt;
1153 first = false;
1156 /* We got a sequence. We expect it to end with a type demotion operation.
1157 Otherwise, we quit (for now). There are three possible cases: the
1158 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1159 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1160 NEW_TYPE differs (we create a new conversion statement). */
1161 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1163 use_lhs = gimple_assign_lhs (use_stmt);
1164 use_type = TREE_TYPE (use_lhs);
1165 /* Support only type promotion or signedess change. */
1166 if (!INTEGRAL_TYPE_P (use_type)
1167 || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1168 return NULL;
1170 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1171 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1173 /* Create NEW_TYPE->USE_TYPE conversion. */
1174 tmp = create_tmp_reg (use_type, NULL);
1175 add_referenced_var (tmp);
1176 new_oprnd = make_ssa_name (tmp, NULL);
1177 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1178 var, NULL_TREE);
1179 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1181 *type_in = get_vectype_for_scalar_type (new_type);
1182 *type_out = get_vectype_for_scalar_type (use_type);
1184 /* We created a pattern statement for the last statement in the
1185 sequence, so we don't need to associate it with the pattern
1186 statement created for PREV_STMT. Therefore, we add PREV_STMT
1187 to the list in order to mark it later in vect_pattern_recog_1. */
1188 if (prev_stmt)
1189 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1191 else
1193 if (prev_stmt)
1194 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
1195 = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
1197 *type_in = vectype;
1198 *type_out = NULL_TREE;
1201 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1203 else
1204 /* TODO: support general case, create a conversion to the correct type. */
1205 return NULL;
1207 /* Pattern detected. */
1208 if (vect_print_dump_info (REPORT_DETAILS))
1210 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1211 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1214 return pattern_stmt;
1218 /* Function vect_recog_mixed_size_cond_pattern
1220 Try to find the following pattern:
1222 type x_t, y_t;
1223 TYPE a_T, b_T, c_T;
1224 loop:
1225 S1 a_T = x_t CMP y_t ? b_T : c_T;
1227 where type 'TYPE' is an integral type which has different size
1228 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1229 than 'type', the constants need to fit into an integer type
1230 with the same width as 'type'.
1232 Input:
1234 * LAST_STMT: A stmt from which the pattern search begins.
1236 Output:
1238 * TYPE_IN: The type of the input arguments to the pattern.
1240 * TYPE_OUT: The type of the output of this pattern.
1242 * Return value: A new stmt that will be used to replace the pattern.
1243 Additionally a def_stmt is added.
1245 a_it = x_t CMP y_t ? b_it : c_it;
1246 a_T = (TYPE) a_it; */
1248 static gimple
1249 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1250 tree *type_out)
1252 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1253 tree cond_expr, then_clause, else_clause;
1254 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1255 tree type, vectype, comp_vectype, itype, vecitype;
1256 enum machine_mode cmpmode;
1257 gimple pattern_stmt, def_stmt;
1258 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1260 if (!is_gimple_assign (last_stmt)
1261 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1262 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1263 return NULL;
1265 cond_expr = gimple_assign_rhs1 (last_stmt);
1266 then_clause = gimple_assign_rhs2 (last_stmt);
1267 else_clause = gimple_assign_rhs3 (last_stmt);
1269 if (TREE_CODE (then_clause) != INTEGER_CST
1270 || TREE_CODE (else_clause) != INTEGER_CST)
1271 return NULL;
1273 if (!COMPARISON_CLASS_P (cond_expr))
1274 return NULL;
1276 comp_vectype
1277 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1278 if (comp_vectype == NULL_TREE)
1279 return NULL;
1281 type = gimple_expr_type (last_stmt);
1282 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1284 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1285 return NULL;
1287 vectype = get_vectype_for_scalar_type (type);
1288 if (vectype == NULL_TREE)
1289 return NULL;
1291 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1292 return NULL;
1294 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1295 TYPE_UNSIGNED (type));
1296 if (itype == NULL_TREE
1297 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1298 return NULL;
1300 vecitype = get_vectype_for_scalar_type (itype);
1301 if (vecitype == NULL_TREE)
1302 return NULL;
1304 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1305 return NULL;
1307 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1309 if (!int_fits_type_p (then_clause, itype)
1310 || !int_fits_type_p (else_clause, itype))
1311 return NULL;
1314 def_stmt
1315 = gimple_build_assign_with_ops3 (COND_EXPR,
1316 vect_recog_temp_ssa_var (itype, NULL),
1317 unshare_expr (cond_expr),
1318 fold_convert (itype, then_clause),
1319 fold_convert (itype, else_clause));
1320 pattern_stmt
1321 = gimple_build_assign_with_ops (NOP_EXPR,
1322 vect_recog_temp_ssa_var (type, NULL),
1323 gimple_assign_lhs (def_stmt), NULL_TREE);
1325 STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
1326 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1327 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1328 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1329 *type_in = vecitype;
1330 *type_out = vectype;
1332 return pattern_stmt;
1336 /* Mark statements that are involved in a pattern. */
1338 static inline void
1339 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
1340 tree pattern_vectype)
1342 stmt_vec_info pattern_stmt_info, def_stmt_info;
1343 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1344 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
1345 gimple def_stmt;
1347 set_vinfo_for_stmt (pattern_stmt,
1348 new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
1349 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1350 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
1352 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
1353 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
1354 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1355 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
1356 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
1357 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
1358 STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
1359 = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
1360 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
1362 def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
1363 def_stmt_info = vinfo_for_stmt (def_stmt);
1364 if (def_stmt_info == NULL)
1366 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1367 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1369 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
1370 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
1371 STMT_VINFO_DEF_TYPE (def_stmt_info)
1372 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1373 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
1374 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
1378 /* Function vect_pattern_recog_1
1380 Input:
1381 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
1382 computation pattern.
1383 STMT: A stmt from which the pattern search should start.
1385 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
1386 expression that computes the same functionality and can be used to
1387 replace the sequence of stmts that are involved in the pattern.
1389 Output:
1390 This function checks if the expression returned by PATTERN_RECOG_FUNC is
1391 supported in vector form by the target. We use 'TYPE_IN' to obtain the
1392 relevant vector type. If 'TYPE_IN' is already a vector type, then this
1393 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
1394 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
1395 to the available target pattern.
1397 This function also does some bookkeeping, as explained in the documentation
1398 for vect_recog_pattern. */
1400 static void
1401 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
1402 gimple_stmt_iterator si,
1403 VEC (gimple, heap) **stmts_to_replace)
1405 gimple stmt = gsi_stmt (si), pattern_stmt;
1406 stmt_vec_info stmt_info;
1407 loop_vec_info loop_vinfo;
1408 tree pattern_vectype;
1409 tree type_in, type_out;
1410 enum tree_code code;
1411 int i;
1412 gimple next;
1414 VEC_truncate (gimple, *stmts_to_replace, 0);
1415 VEC_quick_push (gimple, *stmts_to_replace, stmt);
1416 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
1417 if (!pattern_stmt)
1418 return;
1420 stmt = VEC_last (gimple, *stmts_to_replace);
1421 stmt_info = vinfo_for_stmt (stmt);
1422 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1424 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
1426 /* No need to check target support (already checked by the pattern
1427 recognition function). */
1428 pattern_vectype = type_out ? type_out : type_in;
1430 else
1432 enum machine_mode vec_mode;
1433 enum insn_code icode;
1434 optab optab;
1436 /* Check target support */
1437 type_in = get_vectype_for_scalar_type (type_in);
1438 if (!type_in)
1439 return;
1440 if (type_out)
1441 type_out = get_vectype_for_scalar_type (type_out);
1442 else
1443 type_out = type_in;
1444 if (!type_out)
1445 return;
1446 pattern_vectype = type_out;
1448 if (is_gimple_assign (pattern_stmt))
1449 code = gimple_assign_rhs_code (pattern_stmt);
1450 else
1452 gcc_assert (is_gimple_call (pattern_stmt));
1453 code = CALL_EXPR;
1456 optab = optab_for_tree_code (code, type_in, optab_default);
1457 vec_mode = TYPE_MODE (type_in);
1458 if (!optab
1459 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
1460 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
1461 return;
1464 /* Found a vectorizable pattern. */
1465 if (vect_print_dump_info (REPORT_DETAILS))
1467 fprintf (vect_dump, "pattern recognized: ");
1468 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1471 /* Mark the stmts that are involved in the pattern. */
1472 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
1474 /* Patterns cannot be vectorized using SLP, because they change the order of
1475 computation. */
1476 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
1477 if (next == stmt)
1478 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
1480 /* It is possible that additional pattern stmts are created and inserted in
1481 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
1482 relevant statements. */
1483 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
1484 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
1485 i++)
1487 stmt_info = vinfo_for_stmt (stmt);
1488 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1489 if (vect_print_dump_info (REPORT_DETAILS))
1491 fprintf (vect_dump, "additional pattern stmt: ");
1492 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1495 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
1500 /* Function vect_pattern_recog
1502 Input:
1503 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
1504 computation idioms.
1506 Output - for each computation idiom that is detected we create a new stmt
1507 that provides the same functionality and that can be vectorized. We
1508 also record some information in the struct_stmt_info of the relevant
1509 stmts, as explained below:
1511 At the entry to this function we have the following stmts, with the
1512 following initial value in the STMT_VINFO fields:
1514 stmt in_pattern_p related_stmt vec_stmt
1515 S1: a_i = .... - - -
1516 S2: a_2 = ..use(a_i).. - - -
1517 S3: a_1 = ..use(a_2).. - - -
1518 S4: a_0 = ..use(a_1).. - - -
1519 S5: ... = ..use(a_0).. - - -
1521 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
1522 represented by a single stmt. We then:
1523 - create a new stmt S6 equivalent to the pattern (the stmt is not
1524 inserted into the code)
1525 - fill in the STMT_VINFO fields as follows:
1527 in_pattern_p related_stmt vec_stmt
1528 S1: a_i = .... - - -
1529 S2: a_2 = ..use(a_i).. - - -
1530 S3: a_1 = ..use(a_2).. - - -
1531 S4: a_0 = ..use(a_1).. true S6 -
1532 '---> S6: a_new = .... - S4 -
1533 S5: ... = ..use(a_0).. - - -
1535 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
1536 to each other through the RELATED_STMT field).
1538 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
1539 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
1540 remain irrelevant unless used by stmts other than S4.
1542 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
1543 (because they are marked as irrelevant). It will vectorize S6, and record
1544 a pointer to the new vector stmt VS6 from S6 (as usual).
1545 S4 will be skipped, and S5 will be vectorized as usual:
1547 in_pattern_p related_stmt vec_stmt
1548 S1: a_i = .... - - -
1549 S2: a_2 = ..use(a_i).. - - -
1550 S3: a_1 = ..use(a_2).. - - -
1551 > VS6: va_new = .... - - -
1552 S4: a_0 = ..use(a_1).. true S6 VS6
1553 '---> S6: a_new = .... - S4 VS6
1554 > VS5: ... = ..vuse(va_new).. - - -
1555 S5: ... = ..use(a_0).. - - -
1557 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
1558 elsewhere), and we'll end up with:
1560 VS6: va_new = ....
1561 VS5: ... = ..vuse(va_new)..
1563 In case of more than one pattern statements, e.g., widen-mult with
1564 intermediate type:
1566 S1 a_t = ;
1567 S2 a_T = (TYPE) a_t;
1568 '--> S3: a_it = (interm_type) a_t;
1569 S4 prod_T = a_T * CONST;
1570 '--> S5: prod_T' = a_it w* CONST;
1572 there may be other users of a_T outside the pattern. In that case S2 will
1573 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
1574 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
1575 be recorded in S3. */
1577 void
1578 vect_pattern_recog (loop_vec_info loop_vinfo)
1580 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1581 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1582 unsigned int nbbs = loop->num_nodes;
1583 gimple_stmt_iterator si;
1584 unsigned int i, j;
1585 vect_recog_func_ptr vect_recog_func;
1586 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
1588 if (vect_print_dump_info (REPORT_DETAILS))
1589 fprintf (vect_dump, "=== vect_pattern_recog ===");
1591 /* Scan through the loop stmts, applying the pattern recognition
1592 functions starting at each stmt visited: */
1593 for (i = 0; i < nbbs; i++)
1595 basic_block bb = bbs[i];
1596 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1598 /* Scan over all generic vect_recog_xxx_pattern functions. */
1599 for (j = 0; j < NUM_PATTERNS; j++)
1601 vect_recog_func = vect_vect_recog_func_ptrs[j];
1602 vect_pattern_recog_1 (vect_recog_func, si,
1603 &stmts_to_replace);
1608 VEC_free (gimple, heap, stmts_to_replace);