2012-01-19 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / tree-vect-patterns.c
blob34ac2e5329f691d66dc839582eabf31806ae3861
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 tree *, tree *);
56 static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_over_widening_pattern,
67 vect_recog_widen_shift_pattern,
68 vect_recog_vector_vector_shift_pattern,
69 vect_recog_sdivmod_pow2_pattern,
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
73 static inline void
74 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 stmt);
80 static inline void
81 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
87 /* Function widened_name_p
89 Check whether NAME, an ssa-name used in USE_STMT,
90 is a result of a type-promotion, such that:
91 DEF_STMT: NAME = NOP (name0)
92 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
93 If CHECK_SIGN is TRUE, check that either both types are signed or both are
94 unsigned. */
96 static bool
97 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
98 bool check_sign)
100 tree dummy;
101 gimple dummy_gimple;
102 loop_vec_info loop_vinfo;
103 stmt_vec_info stmt_vinfo;
104 tree type = TREE_TYPE (name);
105 tree oprnd0;
106 enum vect_def_type dt;
107 tree def;
109 stmt_vinfo = vinfo_for_stmt (use_stmt);
110 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
112 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
113 return false;
115 if (dt != vect_internal_def
116 && dt != vect_external_def && dt != vect_constant_def)
117 return false;
119 if (! *def_stmt)
120 return false;
122 if (!is_gimple_assign (*def_stmt))
123 return false;
125 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
126 return false;
128 oprnd0 = gimple_assign_rhs1 (*def_stmt);
130 *half_type = TREE_TYPE (oprnd0);
131 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
132 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
133 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
134 return false;
136 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
137 &dt))
138 return false;
140 return true;
143 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
144 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
146 static tree
147 vect_recog_temp_ssa_var (tree type, gimple stmt)
149 tree var = create_tmp_var (type, "patt");
151 add_referenced_var (var);
152 var = make_ssa_name (var, stmt);
153 return var;
156 /* Function vect_recog_dot_prod_pattern
158 Try to find the following pattern:
160 type x_t, y_t;
161 TYPE1 prod;
162 TYPE2 sum = init;
163 loop:
164 sum_0 = phi <init, sum_1>
165 S1 x_t = ...
166 S2 y_t = ...
167 S3 x_T = (TYPE1) x_t;
168 S4 y_T = (TYPE1) y_t;
169 S5 prod = x_T * y_T;
170 [S6 prod = (TYPE2) prod; #optional]
171 S7 sum_1 = prod + sum_0;
173 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
174 same size of 'TYPE1' or bigger. This is a special case of a reduction
175 computation.
177 Input:
179 * STMTS: Contains a stmt from which the pattern search begins. In the
180 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
181 will be detected.
183 Output:
185 * TYPE_IN: The type of the input arguments to the pattern.
187 * TYPE_OUT: The type of the output of this pattern.
189 * Return value: A new stmt that will be used to replace the sequence of
190 stmts that constitute the pattern. In this case it will be:
191 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
193 Note: The dot-prod idiom is a widening reduction pattern that is
194 vectorized without preserving all the intermediate results. It
195 produces only N/2 (widened) results (by summing up pairs of
196 intermediate results) rather than all N results. Therefore, we
197 cannot allow this pattern when we want to get all the results and in
198 the correct order (as is the case when this computation is in an
199 inner-loop nested in an outer-loop that us being vectorized). */
201 static gimple
202 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
203 tree *type_out)
205 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
206 tree oprnd0, oprnd1;
207 tree oprnd00, oprnd01;
208 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
209 tree type, half_type;
210 gimple pattern_stmt;
211 tree prod_type;
212 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
213 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
214 tree var;
216 if (!is_gimple_assign (last_stmt))
217 return NULL;
219 type = gimple_expr_type (last_stmt);
221 /* Look for the following pattern
222 DX = (TYPE1) X;
223 DY = (TYPE1) Y;
224 DPROD = DX * DY;
225 DDPROD = (TYPE2) DPROD;
226 sum_1 = DDPROD + sum_0;
227 In which
228 - DX is double the size of X
229 - DY is double the size of Y
230 - DX, DY, DPROD all have the same type
231 - sum is the same size of DPROD or bigger
232 - sum has been recognized as a reduction variable.
234 This is equivalent to:
235 DPROD = X w* Y; #widen mult
236 sum_1 = DPROD w+ sum_0; #widen summation
238 DPROD = X w* Y; #widen mult
239 sum_1 = DPROD + sum_0; #summation
242 /* Starting from LAST_STMT, follow the defs of its uses in search
243 of the above pattern. */
245 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
246 return NULL;
248 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
250 /* Has been detected as widening-summation? */
252 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
253 type = gimple_expr_type (stmt);
254 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
255 return NULL;
256 oprnd0 = gimple_assign_rhs1 (stmt);
257 oprnd1 = gimple_assign_rhs2 (stmt);
258 half_type = TREE_TYPE (oprnd0);
260 else
262 gimple def_stmt;
264 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
265 return NULL;
266 oprnd0 = gimple_assign_rhs1 (last_stmt);
267 oprnd1 = gimple_assign_rhs2 (last_stmt);
268 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
269 || !types_compatible_p (TREE_TYPE (oprnd1), type))
270 return NULL;
271 stmt = last_stmt;
273 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
275 stmt = def_stmt;
276 oprnd0 = gimple_assign_rhs1 (stmt);
278 else
279 half_type = type;
282 /* So far so good. Since last_stmt was detected as a (summation) reduction,
283 we know that oprnd1 is the reduction variable (defined by a loop-header
284 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
285 Left to check that oprnd0 is defined by a (widen_)mult_expr */
286 if (TREE_CODE (oprnd0) != SSA_NAME)
287 return NULL;
289 prod_type = half_type;
290 stmt = SSA_NAME_DEF_STMT (oprnd0);
292 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
293 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
294 return NULL;
296 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
297 inside the loop (in case we are analyzing an outer-loop). */
298 if (!is_gimple_assign (stmt))
299 return NULL;
300 stmt_vinfo = vinfo_for_stmt (stmt);
301 gcc_assert (stmt_vinfo);
302 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
303 return NULL;
304 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
305 return NULL;
306 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
308 /* Has been detected as a widening multiplication? */
310 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
311 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
312 return NULL;
313 stmt_vinfo = vinfo_for_stmt (stmt);
314 gcc_assert (stmt_vinfo);
315 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
316 oprnd00 = gimple_assign_rhs1 (stmt);
317 oprnd01 = gimple_assign_rhs2 (stmt);
319 else
321 tree half_type0, half_type1;
322 gimple def_stmt;
323 tree oprnd0, oprnd1;
325 oprnd0 = gimple_assign_rhs1 (stmt);
326 oprnd1 = gimple_assign_rhs2 (stmt);
327 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
328 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
329 return NULL;
330 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
331 return NULL;
332 oprnd00 = gimple_assign_rhs1 (def_stmt);
333 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
334 return NULL;
335 oprnd01 = gimple_assign_rhs1 (def_stmt);
336 if (!types_compatible_p (half_type0, half_type1))
337 return NULL;
338 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
339 return NULL;
342 half_type = TREE_TYPE (oprnd00);
343 *type_in = half_type;
344 *type_out = type;
346 /* Pattern detected. Create a stmt to be used to replace the pattern: */
347 var = vect_recog_temp_ssa_var (type, NULL);
348 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
349 oprnd00, oprnd01, oprnd1);
351 if (vect_print_dump_info (REPORT_DETAILS))
353 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
354 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
357 /* We don't allow changing the order of the computation in the inner-loop
358 when doing outer-loop vectorization. */
359 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
361 return pattern_stmt;
365 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
366 and LSHIFT_EXPR.
368 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
369 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
371 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
372 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
373 that satisfies the above restrictions, we can perform a widening opeartion
374 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
375 with a_it = (interm_type) a_t; */
377 static bool
378 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
379 tree const_oprnd, tree *oprnd,
380 VEC (gimple, heap) **stmts, tree type,
381 tree *half_type, gimple def_stmt)
383 tree new_type, new_oprnd, tmp;
384 gimple new_stmt;
385 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
386 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
388 if (code != MULT_EXPR && code != LSHIFT_EXPR)
389 return false;
391 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
392 || (code == LSHIFT_EXPR
393 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
394 != 1))
395 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
397 /* CONST_OPRND is a constant of HALF_TYPE. */
398 *oprnd = gimple_assign_rhs1 (def_stmt);
399 return true;
402 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
403 || !gimple_bb (def_stmt)
404 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
405 || !vinfo_for_stmt (def_stmt))
406 return false;
408 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
409 a type 2 times bigger than HALF_TYPE. */
410 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
411 TYPE_UNSIGNED (type));
412 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
413 || (code == LSHIFT_EXPR
414 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
415 return false;
417 /* Use NEW_TYPE for widening operation. */
418 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
420 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
421 /* Check if the already created pattern stmt is what we need. */
422 if (!is_gimple_assign (new_stmt)
423 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
424 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
425 return false;
427 VEC_safe_push (gimple, heap, *stmts, def_stmt);
428 *oprnd = gimple_assign_lhs (new_stmt);
430 else
432 /* Create a_T = (NEW_TYPE) a_t; */
433 *oprnd = gimple_assign_rhs1 (def_stmt);
434 tmp = create_tmp_var (new_type, NULL);
435 add_referenced_var (tmp);
436 new_oprnd = make_ssa_name (tmp, NULL);
437 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
438 NULL_TREE);
439 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
440 VEC_safe_push (gimple, heap, *stmts, def_stmt);
441 *oprnd = new_oprnd;
444 *half_type = new_type;
445 return true;
449 /* Function vect_recog_widen_mult_pattern
451 Try to find the following pattern:
453 type a_t, b_t;
454 TYPE a_T, b_T, prod_T;
456 S1 a_t = ;
457 S2 b_t = ;
458 S3 a_T = (TYPE) a_t;
459 S4 b_T = (TYPE) b_t;
460 S5 prod_T = a_T * b_T;
462 where type 'TYPE' is at least double the size of type 'type'.
464 Also detect unsgigned cases:
466 unsigned type a_t, b_t;
467 unsigned TYPE u_prod_T;
468 TYPE a_T, b_T, prod_T;
470 S1 a_t = ;
471 S2 b_t = ;
472 S3 a_T = (TYPE) a_t;
473 S4 b_T = (TYPE) b_t;
474 S5 prod_T = a_T * b_T;
475 S6 u_prod_T = (unsigned TYPE) prod_T;
477 and multiplication by constants:
479 type a_t;
480 TYPE a_T, prod_T;
482 S1 a_t = ;
483 S3 a_T = (TYPE) a_t;
484 S5 prod_T = a_T * CONST;
486 A special case of multiplication by constants is when 'TYPE' is 4 times
487 bigger than 'type', but CONST fits an intermediate type 2 times smaller
488 than 'TYPE'. In that case we create an additional pattern stmt for S3
489 to create a variable of the intermediate type, and perform widen-mult
490 on the intermediate type as well:
492 type a_t;
493 interm_type a_it;
494 TYPE a_T, prod_T, prod_T';
496 S1 a_t = ;
497 S3 a_T = (TYPE) a_t;
498 '--> a_it = (interm_type) a_t;
499 S5 prod_T = a_T * CONST;
500 '--> prod_T' = a_it w* CONST;
502 Input/Output:
504 * STMTS: Contains a stmt from which the pattern search begins. In the
505 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
506 is detected. In case of unsigned widen-mult, the original stmt (S5) is
507 replaced with S6 in STMTS. In case of multiplication by a constant
508 of an intermediate type (the last case above), STMTS also contains S3
509 (inserted before S5).
511 Output:
513 * TYPE_IN: The type of the input arguments to the pattern.
515 * TYPE_OUT: The type of the output of this pattern.
517 * Return value: A new stmt that will be used to replace the sequence of
518 stmts that constitute the pattern. In this case it will be:
519 WIDEN_MULT <a_t, b_t>
522 static gimple
523 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
524 tree *type_in, tree *type_out)
526 gimple last_stmt = VEC_pop (gimple, *stmts);
527 gimple def_stmt0, def_stmt1;
528 tree oprnd0, oprnd1;
529 tree type, half_type0, half_type1;
530 gimple pattern_stmt;
531 tree vectype, vectype_out = NULL_TREE;
532 tree dummy;
533 tree var;
534 enum tree_code dummy_code;
535 int dummy_int;
536 VEC (tree, heap) *dummy_vec;
537 bool op1_ok;
539 if (!is_gimple_assign (last_stmt))
540 return NULL;
542 type = gimple_expr_type (last_stmt);
544 /* Starting from LAST_STMT, follow the defs of its uses in search
545 of the above pattern. */
547 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
548 return NULL;
550 oprnd0 = gimple_assign_rhs1 (last_stmt);
551 oprnd1 = gimple_assign_rhs2 (last_stmt);
552 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
553 || !types_compatible_p (TREE_TYPE (oprnd1), type))
554 return NULL;
556 /* Check argument 0. */
557 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
558 return NULL;
559 /* Check argument 1. */
560 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
562 if (op1_ok)
564 oprnd0 = gimple_assign_rhs1 (def_stmt0);
565 oprnd1 = gimple_assign_rhs1 (def_stmt1);
567 else
569 if (TREE_CODE (oprnd1) == INTEGER_CST
570 && TREE_CODE (half_type0) == INTEGER_TYPE
571 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
572 &oprnd0, stmts, type,
573 &half_type0, def_stmt0))
574 half_type1 = half_type0;
575 else
576 return NULL;
579 /* Handle unsigned case. Look for
580 S6 u_prod_T = (unsigned TYPE) prod_T;
581 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
582 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
584 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
585 imm_use_iterator imm_iter;
586 use_operand_p use_p;
587 int nuses = 0;
588 gimple use_stmt = NULL;
589 tree use_type;
591 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
592 return NULL;
594 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
596 if (is_gimple_debug (USE_STMT (use_p)))
597 continue;
598 use_stmt = USE_STMT (use_p);
599 nuses++;
602 if (nuses != 1 || !is_gimple_assign (use_stmt)
603 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
604 return NULL;
606 use_lhs = gimple_assign_lhs (use_stmt);
607 use_type = TREE_TYPE (use_lhs);
608 if (!INTEGRAL_TYPE_P (use_type)
609 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
610 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
611 return NULL;
613 type = use_type;
614 last_stmt = use_stmt;
617 if (!types_compatible_p (half_type0, half_type1))
618 return NULL;
620 /* Pattern detected. */
621 if (vect_print_dump_info (REPORT_DETAILS))
622 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
624 /* Check target support */
625 vectype = get_vectype_for_scalar_type (half_type0);
626 vectype_out = get_vectype_for_scalar_type (type);
627 if (!vectype
628 || !vectype_out
629 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
630 vectype_out, vectype,
631 &dummy, &dummy, &dummy_code,
632 &dummy_code, &dummy_int, &dummy_vec))
633 return NULL;
635 *type_in = vectype;
636 *type_out = vectype_out;
638 /* Pattern supported. Create a stmt to be used to replace the pattern: */
639 var = vect_recog_temp_ssa_var (type, NULL);
640 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
641 oprnd1);
643 if (vect_print_dump_info (REPORT_DETAILS))
644 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
646 VEC_safe_push (gimple, heap, *stmts, last_stmt);
647 return pattern_stmt;
651 /* Function vect_recog_pow_pattern
653 Try to find the following pattern:
655 x = POW (y, N);
657 with POW being one of pow, powf, powi, powif and N being
658 either 2 or 0.5.
660 Input:
662 * LAST_STMT: A stmt from which the pattern search begins.
664 Output:
666 * TYPE_IN: The type of the input arguments to the pattern.
668 * TYPE_OUT: The type of the output of this pattern.
670 * Return value: A new stmt that will be used to replace the sequence of
671 stmts that constitute the pattern. In this case it will be:
672 x = x * x
674 x = sqrt (x)
677 static gimple
678 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
679 tree *type_out)
681 gimple last_stmt = VEC_index (gimple, *stmts, 0);
682 tree fn, base, exp = NULL;
683 gimple stmt;
684 tree var;
686 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
687 return NULL;
689 fn = gimple_call_fndecl (last_stmt);
690 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
691 return NULL;
693 switch (DECL_FUNCTION_CODE (fn))
695 case BUILT_IN_POWIF:
696 case BUILT_IN_POWI:
697 case BUILT_IN_POWF:
698 case BUILT_IN_POW:
699 base = gimple_call_arg (last_stmt, 0);
700 exp = gimple_call_arg (last_stmt, 1);
701 if (TREE_CODE (exp) != REAL_CST
702 && TREE_CODE (exp) != INTEGER_CST)
703 return NULL;
704 break;
706 default:
707 return NULL;
710 /* We now have a pow or powi builtin function call with a constant
711 exponent. */
713 *type_out = NULL_TREE;
715 /* Catch squaring. */
716 if ((host_integerp (exp, 0)
717 && tree_low_cst (exp, 0) == 2)
718 || (TREE_CODE (exp) == REAL_CST
719 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
721 *type_in = TREE_TYPE (base);
723 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
724 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
725 return stmt;
728 /* Catch square root. */
729 if (TREE_CODE (exp) == REAL_CST
730 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
732 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
733 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
734 if (*type_in)
736 gimple stmt = gimple_build_call (newfn, 1, base);
737 if (vectorizable_function (stmt, *type_in, *type_in)
738 != NULL_TREE)
740 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
741 gimple_call_set_lhs (stmt, var);
742 return stmt;
747 return NULL;
751 /* Function vect_recog_widen_sum_pattern
753 Try to find the following pattern:
755 type x_t;
756 TYPE x_T, sum = init;
757 loop:
758 sum_0 = phi <init, sum_1>
759 S1 x_t = *p;
760 S2 x_T = (TYPE) x_t;
761 S3 sum_1 = x_T + sum_0;
763 where type 'TYPE' is at least double the size of type 'type', i.e - we're
764 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
765 a special case of a reduction computation.
767 Input:
769 * LAST_STMT: A stmt from which the pattern search begins. In the example,
770 when this function is called with S3, the pattern {S2,S3} will be detected.
772 Output:
774 * TYPE_IN: The type of the input arguments to the pattern.
776 * TYPE_OUT: The type of the output of this pattern.
778 * Return value: A new stmt that will be used to replace the sequence of
779 stmts that constitute the pattern. In this case it will be:
780 WIDEN_SUM <x_t, sum_0>
782 Note: The widening-sum idiom is a widening reduction pattern that is
783 vectorized without preserving all the intermediate results. It
784 produces only N/2 (widened) results (by summing up pairs of
785 intermediate results) rather than all N results. Therefore, we
786 cannot allow this pattern when we want to get all the results and in
787 the correct order (as is the case when this computation is in an
788 inner-loop nested in an outer-loop that us being vectorized). */
790 static gimple
791 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
792 tree *type_out)
794 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
795 tree oprnd0, oprnd1;
796 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
797 tree type, half_type;
798 gimple pattern_stmt;
799 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
800 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
801 tree var;
803 if (!is_gimple_assign (last_stmt))
804 return NULL;
806 type = gimple_expr_type (last_stmt);
808 /* Look for the following pattern
809 DX = (TYPE) X;
810 sum_1 = DX + sum_0;
811 In which DX is at least double the size of X, and sum_1 has been
812 recognized as a reduction variable.
815 /* Starting from LAST_STMT, follow the defs of its uses in search
816 of the above pattern. */
818 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
819 return NULL;
821 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
822 return NULL;
824 oprnd0 = gimple_assign_rhs1 (last_stmt);
825 oprnd1 = gimple_assign_rhs2 (last_stmt);
826 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
827 || !types_compatible_p (TREE_TYPE (oprnd1), type))
828 return NULL;
830 /* So far so good. Since last_stmt was detected as a (summation) reduction,
831 we know that oprnd1 is the reduction variable (defined by a loop-header
832 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
833 Left to check that oprnd0 is defined by a cast from type 'type' to type
834 'TYPE'. */
836 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
837 return NULL;
839 oprnd0 = gimple_assign_rhs1 (stmt);
840 *type_in = half_type;
841 *type_out = type;
843 /* Pattern detected. Create a stmt to be used to replace the pattern: */
844 var = vect_recog_temp_ssa_var (type, NULL);
845 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
846 oprnd0, oprnd1);
848 if (vect_print_dump_info (REPORT_DETAILS))
850 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
851 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
854 /* We don't allow changing the order of the computation in the inner-loop
855 when doing outer-loop vectorization. */
856 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
858 return pattern_stmt;
862 /* Return TRUE if the operation in STMT can be performed on a smaller type.
864 Input:
865 STMT - a statement to check.
866 DEF - we support operations with two operands, one of which is constant.
867 The other operand can be defined by a demotion operation, or by a
868 previous statement in a sequence of over-promoted operations. In the
869 later case DEF is used to replace that operand. (It is defined by a
870 pattern statement we created for the previous statement in the
871 sequence).
873 Input/output:
874 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
875 NULL, it's the type of DEF.
876 STMTS - additional pattern statements. If a pattern statement (type
877 conversion) is created in this function, its original statement is
878 added to STMTS.
880 Output:
881 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
882 operands to use in the new pattern statement for STMT (will be created
883 in vect_recog_over_widening_pattern ()).
884 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
885 statements for STMT: the first one is a type promotion and the second
886 one is the operation itself. We return the type promotion statement
887 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
888 the second pattern statement. */
890 static bool
891 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
892 tree *op0, tree *op1, gimple *new_def_stmt,
893 VEC (gimple, heap) **stmts)
895 enum tree_code code;
896 tree const_oprnd, oprnd;
897 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
898 gimple def_stmt, new_stmt;
899 bool first = false;
900 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
901 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
903 *op0 = NULL_TREE;
904 *op1 = NULL_TREE;
905 *new_def_stmt = NULL;
907 if (!is_gimple_assign (stmt))
908 return false;
910 code = gimple_assign_rhs_code (stmt);
911 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
912 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
913 return false;
915 oprnd = gimple_assign_rhs1 (stmt);
916 const_oprnd = gimple_assign_rhs2 (stmt);
917 type = gimple_expr_type (stmt);
919 if (TREE_CODE (oprnd) != SSA_NAME
920 || TREE_CODE (const_oprnd) != INTEGER_CST)
921 return false;
923 /* If we are in the middle of a sequence, we use DEF from a previous
924 statement. Otherwise, OPRND has to be a result of type promotion. */
925 if (*new_type)
927 half_type = *new_type;
928 oprnd = def;
930 else
932 first = true;
933 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
934 || !gimple_bb (def_stmt)
935 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
936 || !vinfo_for_stmt (def_stmt))
937 return false;
940 /* Can we perform the operation on a smaller type? */
941 switch (code)
943 case BIT_IOR_EXPR:
944 case BIT_XOR_EXPR:
945 case BIT_AND_EXPR:
946 if (!int_fits_type_p (const_oprnd, half_type))
948 /* HALF_TYPE is not enough. Try a bigger type if possible. */
949 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
950 return false;
952 interm_type = build_nonstandard_integer_type (
953 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
954 if (!int_fits_type_p (const_oprnd, interm_type))
955 return false;
958 break;
960 case LSHIFT_EXPR:
961 /* Try intermediate type - HALF_TYPE is not enough for sure. */
962 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
963 return false;
965 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
966 (e.g., if the original value was char, the shift amount is at most 8
967 if we want to use short). */
968 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
969 return false;
971 interm_type = build_nonstandard_integer_type (
972 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
974 if (!vect_supportable_shift (code, interm_type))
975 return false;
977 break;
979 case RSHIFT_EXPR:
980 if (vect_supportable_shift (code, half_type))
981 break;
983 /* Try intermediate type - HALF_TYPE is not supported. */
984 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
985 return false;
987 interm_type = build_nonstandard_integer_type (
988 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
990 if (!vect_supportable_shift (code, interm_type))
991 return false;
993 break;
995 default:
996 gcc_unreachable ();
999 /* There are four possible cases:
1000 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1001 the first statement in the sequence)
1002 a. The original, HALF_TYPE, is not enough - we replace the promotion
1003 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1004 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1005 promotion.
1006 2. OPRND is defined by a pattern statement we created.
1007 a. Its type is not sufficient for the operation, we create a new stmt:
1008 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1009 this statement in NEW_DEF_STMT, and it is later put in
1010 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1011 b. OPRND is good to use in the new statement. */
1012 if (first)
1014 if (interm_type)
1016 /* Replace the original type conversion HALF_TYPE->TYPE with
1017 HALF_TYPE->INTERM_TYPE. */
1018 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1020 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1021 /* Check if the already created pattern stmt is what we need. */
1022 if (!is_gimple_assign (new_stmt)
1023 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1024 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1025 return false;
1027 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1028 oprnd = gimple_assign_lhs (new_stmt);
1030 else
1032 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1033 oprnd = gimple_assign_rhs1 (def_stmt);
1034 tmp = create_tmp_reg (interm_type, NULL);
1035 add_referenced_var (tmp);
1036 new_oprnd = make_ssa_name (tmp, NULL);
1037 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1038 oprnd, NULL_TREE);
1039 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1040 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1041 oprnd = new_oprnd;
1044 else
1046 /* Retrieve the operand before the type promotion. */
1047 oprnd = gimple_assign_rhs1 (def_stmt);
1050 else
1052 if (interm_type)
1054 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1055 tmp = create_tmp_reg (interm_type, NULL);
1056 add_referenced_var (tmp);
1057 new_oprnd = make_ssa_name (tmp, NULL);
1058 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1059 oprnd, NULL_TREE);
1060 oprnd = new_oprnd;
1061 *new_def_stmt = new_stmt;
1064 /* Otherwise, OPRND is already set. */
1067 if (interm_type)
1068 *new_type = interm_type;
1069 else
1070 *new_type = half_type;
1072 *op0 = oprnd;
1073 *op1 = fold_convert (*new_type, const_oprnd);
1075 return true;
1079 /* Try to find a statement or a sequence of statements that can be performed
1080 on a smaller type:
1082 type x_t;
1083 TYPE x_T, res0_T, res1_T;
1084 loop:
1085 S1 x_t = *p;
1086 S2 x_T = (TYPE) x_t;
1087 S3 res0_T = op (x_T, C0);
1088 S4 res1_T = op (res0_T, C1);
1089 S5 ... = () res1_T; - type demotion
1091 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1092 constants.
1093 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1094 be 'type' or some intermediate type. For now, we expect S5 to be a type
1095 demotion operation. We also check that S3 and S4 have only one use. */
1097 static gimple
1098 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1099 tree *type_in, tree *type_out)
1101 gimple stmt = VEC_pop (gimple, *stmts);
1102 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1103 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1104 imm_use_iterator imm_iter;
1105 use_operand_p use_p;
1106 int nuses = 0;
1107 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1108 bool first;
1109 struct loop *loop = (gimple_bb (stmt))->loop_father;
1110 tree type = NULL;
1112 first = true;
1113 while (1)
1115 if (!vinfo_for_stmt (stmt)
1116 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1117 return NULL;
1119 new_def_stmt = NULL;
1120 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1121 &op0, &op1, &new_def_stmt,
1122 stmts))
1124 if (first)
1125 return NULL;
1126 else
1127 break;
1130 /* STMT can be performed on a smaller type. Check its uses. */
1131 lhs = gimple_assign_lhs (stmt);
1132 nuses = 0;
1133 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1135 if (is_gimple_debug (USE_STMT (use_p)))
1136 continue;
1137 use_stmt = USE_STMT (use_p);
1138 nuses++;
1141 if (nuses != 1 || !is_gimple_assign (use_stmt)
1142 || !gimple_bb (use_stmt)
1143 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1144 return NULL;
1146 /* Create pattern statement for STMT. */
1147 vectype = get_vectype_for_scalar_type (new_type);
1148 if (!vectype)
1149 return NULL;
1151 /* We want to collect all the statements for which we create pattern
1152 statetments, except for the case when the last statement in the
1153 sequence doesn't have a corresponding pattern statement. In such
1154 case we associate the last pattern statement with the last statement
1155 in the sequence. Therefore, we only add the original statement to
1156 the list if we know that it is not the last. */
1157 if (prev_stmt)
1158 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1160 var = vect_recog_temp_ssa_var (new_type, NULL);
1161 pattern_stmt
1162 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1163 op0, op1);
1164 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1165 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1167 if (vect_print_dump_info (REPORT_DETAILS))
1169 fprintf (vect_dump, "created pattern stmt: ");
1170 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1173 type = gimple_expr_type (stmt);
1174 prev_stmt = stmt;
1175 stmt = use_stmt;
1177 first = false;
1180 /* We got a sequence. We expect it to end with a type demotion operation.
1181 Otherwise, we quit (for now). There are three possible cases: the
1182 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1183 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1184 NEW_TYPE differs (we create a new conversion statement). */
1185 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1187 use_lhs = gimple_assign_lhs (use_stmt);
1188 use_type = TREE_TYPE (use_lhs);
1189 /* Support only type demotion or signedess change. */
1190 if (!INTEGRAL_TYPE_P (use_type)
1191 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1192 return NULL;
1194 /* Check that NEW_TYPE is not bigger than the conversion result. */
1195 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1196 return NULL;
1198 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1199 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1201 /* Create NEW_TYPE->USE_TYPE conversion. */
1202 tmp = create_tmp_reg (use_type, NULL);
1203 add_referenced_var (tmp);
1204 new_oprnd = make_ssa_name (tmp, NULL);
1205 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1206 var, NULL_TREE);
1207 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1209 *type_in = get_vectype_for_scalar_type (new_type);
1210 *type_out = get_vectype_for_scalar_type (use_type);
1212 /* We created a pattern statement for the last statement in the
1213 sequence, so we don't need to associate it with the pattern
1214 statement created for PREV_STMT. Therefore, we add PREV_STMT
1215 to the list in order to mark it later in vect_pattern_recog_1. */
1216 if (prev_stmt)
1217 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1219 else
1221 if (prev_stmt)
1222 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1223 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1225 *type_in = vectype;
1226 *type_out = NULL_TREE;
1229 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1231 else
1232 /* TODO: support general case, create a conversion to the correct type. */
1233 return NULL;
1235 /* Pattern detected. */
1236 if (vect_print_dump_info (REPORT_DETAILS))
1238 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1239 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1242 return pattern_stmt;
1245 /* Detect widening shift pattern:
1247 type a_t;
1248 TYPE a_T, res_T;
1250 S1 a_t = ;
1251 S2 a_T = (TYPE) a_t;
1252 S3 res_T = a_T << CONST;
1254 where type 'TYPE' is at least double the size of type 'type'.
1256 Also detect unsigned cases:
1258 unsigned type a_t;
1259 unsigned TYPE u_res_T;
1260 TYPE a_T, res_T;
1262 S1 a_t = ;
1263 S2 a_T = (TYPE) a_t;
1264 S3 res_T = a_T << CONST;
1265 S4 u_res_T = (unsigned TYPE) res_T;
1267 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1268 create an additional pattern stmt for S2 to create a variable of an
1269 intermediate type, and perform widen-shift on the intermediate type:
1271 type a_t;
1272 interm_type a_it;
1273 TYPE a_T, res_T, res_T';
1275 S1 a_t = ;
1276 S2 a_T = (TYPE) a_t;
1277 '--> a_it = (interm_type) a_t;
1278 S3 res_T = a_T << CONST;
1279 '--> res_T' = a_it <<* CONST;
1281 Input/Output:
1283 * STMTS: Contains a stmt from which the pattern search begins.
1284 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1285 in STMTS. When an intermediate type is used and a pattern statement is
1286 created for S2, we also put S2 here (before S3).
1288 Output:
1290 * TYPE_IN: The type of the input arguments to the pattern.
1292 * TYPE_OUT: The type of the output of this pattern.
1294 * Return value: A new stmt that will be used to replace the sequence of
1295 stmts that constitute the pattern. In this case it will be:
1296 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1298 static gimple
1299 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1300 tree *type_in, tree *type_out)
1302 gimple last_stmt = VEC_pop (gimple, *stmts);
1303 gimple def_stmt0;
1304 tree oprnd0, oprnd1;
1305 tree type, half_type0;
1306 gimple pattern_stmt, orig_stmt = NULL;
1307 tree vectype, vectype_out = NULL_TREE;
1308 tree dummy;
1309 tree var;
1310 enum tree_code dummy_code;
1311 int dummy_int;
1312 VEC (tree, heap) * dummy_vec;
1313 gimple use_stmt = NULL;
1314 bool over_widen = false;
1316 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1317 return NULL;
1319 orig_stmt = last_stmt;
1320 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1322 /* This statement was also detected as over-widening operation (it can't
1323 be any other pattern, because only over-widening detects shifts).
1324 LAST_STMT is the final type demotion statement, but its related
1325 statement is shift. We analyze the related statement to catch cases:
1327 orig code:
1328 type a_t;
1329 itype res;
1330 TYPE a_T, res_T;
1332 S1 a_T = (TYPE) a_t;
1333 S2 res_T = a_T << CONST;
1334 S3 res = (itype)res_T;
1336 (size of type * 2 <= size of itype
1337 and size of itype * 2 <= size of TYPE)
1339 code after over-widening pattern detection:
1341 S1 a_T = (TYPE) a_t;
1342 --> a_it = (itype) a_t;
1343 S2 res_T = a_T << CONST;
1344 S3 res = (itype)res_T; <--- LAST_STMT
1345 --> res = a_it << CONST;
1347 after widen_shift:
1349 S1 a_T = (TYPE) a_t;
1350 --> a_it = (itype) a_t; - redundant
1351 S2 res_T = a_T << CONST;
1352 S3 res = (itype)res_T;
1353 --> res = a_t w<< CONST;
1355 i.e., we replace the three statements with res = a_t w<< CONST. */
1356 last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1357 over_widen = true;
1360 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1361 return NULL;
1363 oprnd0 = gimple_assign_rhs1 (last_stmt);
1364 oprnd1 = gimple_assign_rhs2 (last_stmt);
1365 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1366 return NULL;
1368 /* Check operand 0: it has to be defined by a type promotion. */
1369 if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
1370 return NULL;
1372 /* Check operand 1: has to be positive. We check that it fits the type
1373 in vect_handle_widen_op_by_const (). */
1374 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1375 return NULL;
1377 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1378 type = gimple_expr_type (last_stmt);
1380 /* Check if this a widening operation. */
1381 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1382 &oprnd0, stmts,
1383 type, &half_type0, def_stmt0))
1384 return NULL;
1386 /* Handle unsigned case. Look for
1387 S4 u_res_T = (unsigned TYPE) res_T;
1388 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1389 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1391 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1392 imm_use_iterator imm_iter;
1393 use_operand_p use_p;
1394 int nuses = 0;
1395 tree use_type;
1397 if (over_widen)
1399 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1400 We check here that TYPE is the correct type for the operation,
1401 i.e., it's the type of the original result. */
1402 tree orig_type = gimple_expr_type (orig_stmt);
1403 if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1404 || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1405 return NULL;
1407 else
1409 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1411 if (is_gimple_debug (USE_STMT (use_p)))
1412 continue;
1413 use_stmt = USE_STMT (use_p);
1414 nuses++;
1417 if (nuses != 1 || !is_gimple_assign (use_stmt)
1418 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1419 return NULL;
1421 use_lhs = gimple_assign_lhs (use_stmt);
1422 use_type = TREE_TYPE (use_lhs);
1424 if (!INTEGRAL_TYPE_P (use_type)
1425 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1426 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1427 return NULL;
1429 type = use_type;
1433 /* Pattern detected. */
1434 if (vect_print_dump_info (REPORT_DETAILS))
1435 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1437 /* Check target support. */
1438 vectype = get_vectype_for_scalar_type (half_type0);
1439 vectype_out = get_vectype_for_scalar_type (type);
1441 if (!vectype
1442 || !vectype_out
1443 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1444 vectype_out, vectype,
1445 &dummy, &dummy, &dummy_code,
1446 &dummy_code, &dummy_int,
1447 &dummy_vec))
1448 return NULL;
1450 *type_in = vectype;
1451 *type_out = vectype_out;
1453 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1454 var = vect_recog_temp_ssa_var (type, NULL);
1455 pattern_stmt =
1456 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1458 if (vect_print_dump_info (REPORT_DETAILS))
1459 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1461 if (use_stmt)
1462 last_stmt = use_stmt;
1463 else
1464 last_stmt = orig_stmt;
1466 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1467 return pattern_stmt;
1470 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1471 vectorized:
1473 type a_t;
1474 TYPE b_T, res_T;
1476 S1 a_t = ;
1477 S2 b_T = ;
1478 S3 res_T = b_T op a_t;
1480 where type 'TYPE' is a type with different size than 'type',
1481 and op is <<, >> or rotate.
1483 Also detect cases:
1485 type a_t;
1486 TYPE b_T, c_T, res_T;
1488 S0 c_T = ;
1489 S1 a_t = (type) c_T;
1490 S2 b_T = ;
1491 S3 res_T = b_T op a_t;
1493 Input/Output:
1495 * STMTS: Contains a stmt from which the pattern search begins,
1496 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1497 with a shift/rotate which has same type on both operands, in the
1498 second case just b_T op c_T, in the first case with added cast
1499 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1501 Output:
1503 * TYPE_IN: The type of the input arguments to the pattern.
1505 * TYPE_OUT: The type of the output of this pattern.
1507 * Return value: A new stmt that will be used to replace the shift/rotate
1508 S3 stmt. */
1510 static gimple
1511 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1512 tree *type_in, tree *type_out)
1514 gimple last_stmt = VEC_pop (gimple, *stmts);
1515 tree oprnd0, oprnd1, lhs, var;
1516 gimple pattern_stmt, def_stmt;
1517 enum tree_code rhs_code;
1518 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1519 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1520 enum vect_def_type dt;
1521 tree def;
1523 if (!is_gimple_assign (last_stmt))
1524 return NULL;
1526 rhs_code = gimple_assign_rhs_code (last_stmt);
1527 switch (rhs_code)
1529 case LSHIFT_EXPR:
1530 case RSHIFT_EXPR:
1531 case LROTATE_EXPR:
1532 case RROTATE_EXPR:
1533 break;
1534 default:
1535 return NULL;
1538 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1539 return NULL;
1541 lhs = gimple_assign_lhs (last_stmt);
1542 oprnd0 = gimple_assign_rhs1 (last_stmt);
1543 oprnd1 = gimple_assign_rhs2 (last_stmt);
1544 if (TREE_CODE (oprnd0) != SSA_NAME
1545 || TREE_CODE (oprnd1) != SSA_NAME
1546 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1547 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1548 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1549 || TYPE_PRECISION (TREE_TYPE (lhs))
1550 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1551 return NULL;
1553 if (!vect_is_simple_use (oprnd1, loop_vinfo, NULL, &def_stmt, &def, &dt))
1554 return NULL;
1556 if (dt != vect_internal_def)
1557 return NULL;
1559 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1560 *type_out = *type_in;
1561 if (*type_in == NULL_TREE)
1562 return NULL;
1564 def = NULL_TREE;
1565 if (gimple_assign_cast_p (def_stmt))
1567 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1568 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1569 && TYPE_PRECISION (TREE_TYPE (rhs1))
1570 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1571 def = rhs1;
1574 if (def == NULL_TREE)
1576 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1577 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1578 NULL_TREE);
1579 new_pattern_def_seq (stmt_vinfo, def_stmt);
1582 /* Pattern detected. */
1583 if (vect_print_dump_info (REPORT_DETAILS))
1584 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1586 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1587 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1588 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1590 if (vect_print_dump_info (REPORT_DETAILS))
1591 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1593 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1594 return pattern_stmt;
1597 /* Detect a signed division by power of two constant that wouldn't be
1598 otherwise vectorized:
1600 type a_t, b_t;
1602 S1 a_t = b_t / N;
1604 where type 'type' is a signed integral type and N is a constant positive
1605 power of two.
1607 Similarly handle signed modulo by power of two constant:
1609 S4 a_t = b_t % N;
1611 Input/Output:
1613 * STMTS: Contains a stmt from which the pattern search begins,
1614 i.e. the division stmt. S1 is replaced by:
1615 S3 y_t = b_t < 0 ? N - 1 : 0;
1616 S2 x_t = b_t + y_t;
1617 S1' a_t = x_t >> log2 (N);
1619 S4 is replaced by (where *_T temporaries have unsigned type):
1620 S9 y_T = b_t < 0 ? -1U : 0U;
1621 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1622 S7 z_t = (type) z_T;
1623 S6 w_t = b_t + z_t;
1624 S5 x_t = w_t & (N - 1);
1625 S4' a_t = x_t - z_t;
1627 Output:
1629 * TYPE_IN: The type of the input arguments to the pattern.
1631 * TYPE_OUT: The type of the output of this pattern.
1633 * Return value: A new stmt that will be used to replace the division
1634 S1 or modulo S4 stmt. */
1636 static gimple
1637 vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1638 tree *type_in, tree *type_out)
1640 gimple last_stmt = VEC_pop (gimple, *stmts);
1641 tree oprnd0, oprnd1, vectype, itype, cond;
1642 gimple pattern_stmt, def_stmt;
1643 enum tree_code rhs_code;
1644 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1645 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1646 optab optab;
1648 if (!is_gimple_assign (last_stmt))
1649 return NULL;
1651 rhs_code = gimple_assign_rhs_code (last_stmt);
1652 switch (rhs_code)
1654 case TRUNC_DIV_EXPR:
1655 case TRUNC_MOD_EXPR:
1656 break;
1657 default:
1658 return NULL;
1661 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1662 return NULL;
1664 oprnd0 = gimple_assign_rhs1 (last_stmt);
1665 oprnd1 = gimple_assign_rhs2 (last_stmt);
1666 itype = TREE_TYPE (oprnd0);
1667 if (TREE_CODE (oprnd0) != SSA_NAME
1668 || TREE_CODE (oprnd1) != INTEGER_CST
1669 || TREE_CODE (itype) != INTEGER_TYPE
1670 || TYPE_UNSIGNED (itype)
1671 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1672 || !integer_pow2p (oprnd1)
1673 || tree_int_cst_sgn (oprnd1) != 1)
1674 return NULL;
1676 vectype = get_vectype_for_scalar_type (itype);
1677 if (vectype == NULL_TREE)
1678 return NULL;
1680 /* If the target can handle vectorized division or modulo natively,
1681 don't attempt to optimize this. */
1682 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1683 if (optab != NULL)
1685 enum machine_mode vec_mode = TYPE_MODE (vectype);
1686 int icode = (int) optab_handler (optab, vec_mode);
1687 if (icode != CODE_FOR_nothing
1688 || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1689 return NULL;
1692 /* Pattern detected. */
1693 if (vect_print_dump_info (REPORT_DETAILS))
1694 fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1696 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1697 if (rhs_code == TRUNC_DIV_EXPR)
1699 tree var = vect_recog_temp_ssa_var (itype, NULL);
1700 def_stmt
1701 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1702 fold_build2 (MINUS_EXPR, itype,
1703 oprnd1,
1704 build_int_cst (itype,
1705 1)),
1706 build_int_cst (itype, 0));
1707 new_pattern_def_seq (stmt_vinfo, def_stmt);
1708 var = vect_recog_temp_ssa_var (itype, NULL);
1709 def_stmt
1710 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1711 gimple_assign_lhs (def_stmt));
1712 append_pattern_def_seq (stmt_vinfo, def_stmt);
1714 pattern_stmt
1715 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1716 vect_recog_temp_ssa_var (itype, NULL),
1717 var,
1718 build_int_cst (itype,
1719 tree_log2 (oprnd1)));
1721 else
1723 tree signmask;
1724 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1725 if (compare_tree_int (oprnd1, 2) == 0)
1727 signmask = vect_recog_temp_ssa_var (itype, NULL);
1728 def_stmt
1729 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1730 build_int_cst (itype, 1),
1731 build_int_cst (itype, 0));
1732 append_pattern_def_seq (stmt_vinfo, def_stmt);
1734 else
1736 tree utype
1737 = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1738 tree vecutype = get_vectype_for_scalar_type (utype);
1739 tree shift
1740 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1741 - tree_log2 (oprnd1));
1742 tree var = vect_recog_temp_ssa_var (utype, NULL);
1743 stmt_vec_info def_stmt_vinfo;
1745 def_stmt
1746 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1747 build_int_cst (utype, -1),
1748 build_int_cst (utype, 0));
1749 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1750 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1751 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1752 append_pattern_def_seq (stmt_vinfo, def_stmt);
1753 var = vect_recog_temp_ssa_var (utype, NULL);
1754 def_stmt
1755 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1756 gimple_assign_lhs (def_stmt),
1757 shift);
1758 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1759 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1760 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1761 append_pattern_def_seq (stmt_vinfo, def_stmt);
1762 signmask = vect_recog_temp_ssa_var (itype, NULL);
1763 def_stmt
1764 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1765 NULL_TREE);
1766 append_pattern_def_seq (stmt_vinfo, def_stmt);
1768 def_stmt
1769 = gimple_build_assign_with_ops (PLUS_EXPR,
1770 vect_recog_temp_ssa_var (itype, NULL),
1771 oprnd0, signmask);
1772 append_pattern_def_seq (stmt_vinfo, def_stmt);
1773 def_stmt
1774 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1775 vect_recog_temp_ssa_var (itype, NULL),
1776 gimple_assign_lhs (def_stmt),
1777 fold_build2 (MINUS_EXPR, itype,
1778 oprnd1,
1779 build_int_cst (itype,
1780 1)));
1781 append_pattern_def_seq (stmt_vinfo, def_stmt);
1783 pattern_stmt
1784 = gimple_build_assign_with_ops (MINUS_EXPR,
1785 vect_recog_temp_ssa_var (itype, NULL),
1786 gimple_assign_lhs (def_stmt),
1787 signmask);
1790 if (vect_print_dump_info (REPORT_DETAILS))
1791 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1793 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1795 *type_in = vectype;
1796 *type_out = vectype;
1797 return pattern_stmt;
1800 /* Function vect_recog_mixed_size_cond_pattern
1802 Try to find the following pattern:
1804 type x_t, y_t;
1805 TYPE a_T, b_T, c_T;
1806 loop:
1807 S1 a_T = x_t CMP y_t ? b_T : c_T;
1809 where type 'TYPE' is an integral type which has different size
1810 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1811 than 'type', the constants need to fit into an integer type
1812 with the same width as 'type'.
1814 Input:
1816 * LAST_STMT: A stmt from which the pattern search begins.
1818 Output:
1820 * TYPE_IN: The type of the input arguments to the pattern.
1822 * TYPE_OUT: The type of the output of this pattern.
1824 * Return value: A new stmt that will be used to replace the pattern.
1825 Additionally a def_stmt is added.
1827 a_it = x_t CMP y_t ? b_it : c_it;
1828 a_T = (TYPE) a_it; */
1830 static gimple
1831 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1832 tree *type_out)
1834 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1835 tree cond_expr, then_clause, else_clause;
1836 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1837 tree type, vectype, comp_vectype, itype, vecitype;
1838 enum machine_mode cmpmode;
1839 gimple pattern_stmt, def_stmt;
1840 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1842 if (!is_gimple_assign (last_stmt)
1843 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1844 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1845 return NULL;
1847 cond_expr = gimple_assign_rhs1 (last_stmt);
1848 then_clause = gimple_assign_rhs2 (last_stmt);
1849 else_clause = gimple_assign_rhs3 (last_stmt);
1851 if (TREE_CODE (then_clause) != INTEGER_CST
1852 || TREE_CODE (else_clause) != INTEGER_CST)
1853 return NULL;
1855 if (!COMPARISON_CLASS_P (cond_expr))
1856 return NULL;
1858 comp_vectype
1859 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0)));
1860 if (comp_vectype == NULL_TREE)
1861 return NULL;
1863 type = gimple_expr_type (last_stmt);
1864 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1866 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1867 return NULL;
1869 vectype = get_vectype_for_scalar_type (type);
1870 if (vectype == NULL_TREE)
1871 return NULL;
1873 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1874 return NULL;
1876 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1877 TYPE_UNSIGNED (type));
1878 if (itype == NULL_TREE
1879 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1880 return NULL;
1882 vecitype = get_vectype_for_scalar_type (itype);
1883 if (vecitype == NULL_TREE)
1884 return NULL;
1886 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1887 return NULL;
1889 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1891 if (!int_fits_type_p (then_clause, itype)
1892 || !int_fits_type_p (else_clause, itype))
1893 return NULL;
1896 def_stmt
1897 = gimple_build_assign_with_ops3 (COND_EXPR,
1898 vect_recog_temp_ssa_var (itype, NULL),
1899 unshare_expr (cond_expr),
1900 fold_convert (itype, then_clause),
1901 fold_convert (itype, else_clause));
1902 pattern_stmt
1903 = gimple_build_assign_with_ops (NOP_EXPR,
1904 vect_recog_temp_ssa_var (type, NULL),
1905 gimple_assign_lhs (def_stmt), NULL_TREE);
1907 new_pattern_def_seq (stmt_vinfo, def_stmt);
1908 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1909 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1910 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1911 *type_in = vecitype;
1912 *type_out = vectype;
1914 return pattern_stmt;
1918 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1919 true if bool VAR can be optimized that way. */
1921 static bool
1922 check_bool_pattern (tree var, loop_vec_info loop_vinfo)
1924 gimple def_stmt;
1925 enum vect_def_type dt;
1926 tree def, rhs1;
1927 enum tree_code rhs_code;
1929 if (!vect_is_simple_use (var, loop_vinfo, NULL, &def_stmt, &def, &dt))
1930 return false;
1932 if (dt != vect_internal_def)
1933 return false;
1935 if (!is_gimple_assign (def_stmt))
1936 return false;
1938 if (!has_single_use (def))
1939 return false;
1941 rhs1 = gimple_assign_rhs1 (def_stmt);
1942 rhs_code = gimple_assign_rhs_code (def_stmt);
1943 switch (rhs_code)
1945 case SSA_NAME:
1946 return check_bool_pattern (rhs1, loop_vinfo);
1948 CASE_CONVERT:
1949 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1950 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1951 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1952 return false;
1953 return check_bool_pattern (rhs1, loop_vinfo);
1955 case BIT_NOT_EXPR:
1956 return check_bool_pattern (rhs1, loop_vinfo);
1958 case BIT_AND_EXPR:
1959 case BIT_IOR_EXPR:
1960 case BIT_XOR_EXPR:
1961 if (!check_bool_pattern (rhs1, loop_vinfo))
1962 return false;
1963 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
1965 default:
1966 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
1968 tree vecitype, comp_vectype;
1970 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
1971 if (comp_vectype == NULL_TREE)
1972 return false;
1974 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
1976 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
1977 tree itype
1978 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
1979 vecitype = get_vectype_for_scalar_type (itype);
1980 if (vecitype == NULL_TREE)
1981 return false;
1983 else
1984 vecitype = comp_vectype;
1985 return expand_vec_cond_expr_p (vecitype, comp_vectype);
1987 return false;
1992 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
1993 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1994 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
1996 static tree
1997 adjust_bool_pattern_cast (tree type, tree var)
1999 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2000 gimple cast_stmt, pattern_stmt;
2002 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2003 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2004 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2005 cast_stmt
2006 = gimple_build_assign_with_ops (NOP_EXPR,
2007 vect_recog_temp_ssa_var (type, NULL),
2008 gimple_assign_lhs (pattern_stmt),
2009 NULL_TREE);
2010 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2011 return gimple_assign_lhs (cast_stmt);
2015 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2016 recursively. VAR is an SSA_NAME that should be transformed from bool
2017 to a wider integer type, OUT_TYPE is the desired final integer type of
2018 the whole pattern, TRUEVAL should be NULL unless optimizing
2019 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2020 in the then_clause, STMTS is where statements with added pattern stmts
2021 should be pushed to. */
2023 static tree
2024 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2025 VEC (gimple, heap) **stmts)
2027 gimple stmt = SSA_NAME_DEF_STMT (var);
2028 enum tree_code rhs_code, def_rhs_code;
2029 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2030 location_t loc;
2031 gimple pattern_stmt, def_stmt;
2033 rhs1 = gimple_assign_rhs1 (stmt);
2034 rhs2 = gimple_assign_rhs2 (stmt);
2035 rhs_code = gimple_assign_rhs_code (stmt);
2036 loc = gimple_location (stmt);
2037 switch (rhs_code)
2039 case SSA_NAME:
2040 CASE_CONVERT:
2041 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2042 itype = TREE_TYPE (irhs1);
2043 pattern_stmt
2044 = gimple_build_assign_with_ops (SSA_NAME,
2045 vect_recog_temp_ssa_var (itype, NULL),
2046 irhs1, NULL_TREE);
2047 break;
2049 case BIT_NOT_EXPR:
2050 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2051 itype = TREE_TYPE (irhs1);
2052 pattern_stmt
2053 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2054 vect_recog_temp_ssa_var (itype, NULL),
2055 irhs1, build_int_cst (itype, 1));
2056 break;
2058 case BIT_AND_EXPR:
2059 /* Try to optimize x = y & (a < b ? 1 : 0); into
2060 x = (a < b ? y : 0);
2062 E.g. for:
2063 bool a_b, b_b, c_b;
2064 TYPE d_T;
2066 S1 a_b = x1 CMP1 y1;
2067 S2 b_b = x2 CMP2 y2;
2068 S3 c_b = a_b & b_b;
2069 S4 d_T = (TYPE) c_b;
2071 we would normally emit:
2073 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2074 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2075 S3' c_T = a_T & b_T;
2076 S4' d_T = c_T;
2078 but we can save one stmt by using the
2079 result of one of the COND_EXPRs in the other COND_EXPR and leave
2080 BIT_AND_EXPR stmt out:
2082 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2083 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2084 S4' f_T = c_T;
2086 At least when VEC_COND_EXPR is implemented using masks
2087 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2088 computes the comparison masks and ands it, in one case with
2089 all ones vector, in the other case with a vector register.
2090 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2091 often more expensive. */
2092 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2093 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2094 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2096 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2097 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2098 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2099 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2101 gimple tstmt;
2102 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2103 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2104 tstmt = VEC_pop (gimple, *stmts);
2105 gcc_assert (tstmt == def_stmt);
2106 VEC_quick_push (gimple, *stmts, stmt);
2107 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2108 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2109 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2110 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2111 return irhs2;
2113 else
2114 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2115 goto and_ior_xor;
2117 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2118 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2119 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2121 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2122 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2123 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2124 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2126 gimple tstmt;
2127 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2128 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2129 tstmt = VEC_pop (gimple, *stmts);
2130 gcc_assert (tstmt == def_stmt);
2131 VEC_quick_push (gimple, *stmts, stmt);
2132 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2133 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2134 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2135 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2136 return irhs1;
2138 else
2139 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2140 goto and_ior_xor;
2142 /* FALLTHRU */
2143 case BIT_IOR_EXPR:
2144 case BIT_XOR_EXPR:
2145 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2146 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2147 and_ior_xor:
2148 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2149 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2151 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2152 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2153 int out_prec = TYPE_PRECISION (out_type);
2154 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2155 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2156 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2157 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2158 else
2160 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2161 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2164 itype = TREE_TYPE (irhs1);
2165 pattern_stmt
2166 = gimple_build_assign_with_ops (rhs_code,
2167 vect_recog_temp_ssa_var (itype, NULL),
2168 irhs1, irhs2);
2169 break;
2171 default:
2172 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2173 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2174 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2176 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2177 itype
2178 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2180 else
2181 itype = TREE_TYPE (rhs1);
2182 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2183 if (trueval == NULL_TREE)
2184 trueval = build_int_cst (itype, 1);
2185 else
2186 gcc_checking_assert (useless_type_conversion_p (itype,
2187 TREE_TYPE (trueval)));
2188 pattern_stmt
2189 = gimple_build_assign_with_ops3 (COND_EXPR,
2190 vect_recog_temp_ssa_var (itype, NULL),
2191 cond_expr, trueval,
2192 build_int_cst (itype, 0));
2193 break;
2196 VEC_safe_push (gimple, heap, *stmts, stmt);
2197 gimple_set_location (pattern_stmt, loc);
2198 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2199 return gimple_assign_lhs (pattern_stmt);
2203 /* Function vect_recog_bool_pattern
2205 Try to find pattern like following:
2207 bool a_b, b_b, c_b, d_b, e_b;
2208 TYPE f_T;
2209 loop:
2210 S1 a_b = x1 CMP1 y1;
2211 S2 b_b = x2 CMP2 y2;
2212 S3 c_b = a_b & b_b;
2213 S4 d_b = x3 CMP3 y3;
2214 S5 e_b = c_b | d_b;
2215 S6 f_T = (TYPE) e_b;
2217 where type 'TYPE' is an integral type.
2219 Input:
2221 * LAST_STMT: A stmt at the end from which the pattern
2222 search begins, i.e. cast of a bool to
2223 an integer type.
2225 Output:
2227 * TYPE_IN: The type of the input arguments to the pattern.
2229 * TYPE_OUT: The type of the output of this pattern.
2231 * Return value: A new stmt that will be used to replace the pattern.
2233 Assuming size of TYPE is the same as size of all comparisons
2234 (otherwise some casts would be added where needed), the above
2235 sequence we create related pattern stmts:
2236 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2237 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2238 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2239 S5' e_T = c_T | d_T;
2240 S6' f_T = e_T;
2242 Instead of the above S3' we could emit:
2243 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2244 S3' c_T = a_T | b_T;
2245 but the above is more efficient. */
2247 static gimple
2248 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2249 tree *type_out)
2251 gimple last_stmt = VEC_pop (gimple, *stmts);
2252 enum tree_code rhs_code;
2253 tree var, lhs, rhs, vectype;
2254 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2255 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2256 gimple pattern_stmt;
2258 if (!is_gimple_assign (last_stmt))
2259 return NULL;
2261 var = gimple_assign_rhs1 (last_stmt);
2262 lhs = gimple_assign_lhs (last_stmt);
2264 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2265 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2266 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2267 return NULL;
2269 rhs_code = gimple_assign_rhs_code (last_stmt);
2270 if (CONVERT_EXPR_CODE_P (rhs_code))
2272 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2273 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2274 return NULL;
2275 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2276 if (vectype == NULL_TREE)
2277 return NULL;
2279 if (!check_bool_pattern (var, loop_vinfo))
2280 return NULL;
2282 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2283 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2284 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2285 pattern_stmt
2286 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2287 else
2288 pattern_stmt
2289 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2290 *type_out = vectype;
2291 *type_in = vectype;
2292 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2293 return pattern_stmt;
2295 else if (rhs_code == SSA_NAME
2296 && STMT_VINFO_DATA_REF (stmt_vinfo))
2298 stmt_vec_info pattern_stmt_info;
2299 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2300 gcc_assert (vectype != NULL_TREE);
2301 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2302 return NULL;
2303 if (!check_bool_pattern (var, loop_vinfo))
2304 return NULL;
2306 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2307 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2308 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2310 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2311 gimple cast_stmt
2312 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2313 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2314 rhs = rhs2;
2316 pattern_stmt
2317 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2318 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2319 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2320 STMT_VINFO_DATA_REF (pattern_stmt_info)
2321 = STMT_VINFO_DATA_REF (stmt_vinfo);
2322 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2323 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2324 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2325 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2326 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2327 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2328 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2329 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2330 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2331 *type_out = vectype;
2332 *type_in = vectype;
2333 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2334 return pattern_stmt;
2336 else
2337 return NULL;
2341 /* Mark statements that are involved in a pattern. */
2343 static inline void
2344 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2345 tree pattern_vectype)
2347 stmt_vec_info pattern_stmt_info, def_stmt_info;
2348 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2349 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2350 gimple def_stmt;
2352 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2353 if (pattern_stmt_info == NULL)
2355 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
2356 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2358 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2360 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2361 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2362 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2363 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2364 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2365 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2366 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2367 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2368 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2370 gimple_stmt_iterator si;
2371 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2372 !gsi_end_p (si); gsi_next (&si))
2374 def_stmt = gsi_stmt (si);
2375 def_stmt_info = vinfo_for_stmt (def_stmt);
2376 if (def_stmt_info == NULL)
2378 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
2379 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2381 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2382 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2383 STMT_VINFO_DEF_TYPE (def_stmt_info)
2384 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2385 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2386 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2391 /* Function vect_pattern_recog_1
2393 Input:
2394 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2395 computation pattern.
2396 STMT: A stmt from which the pattern search should start.
2398 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2399 expression that computes the same functionality and can be used to
2400 replace the sequence of stmts that are involved in the pattern.
2402 Output:
2403 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2404 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2405 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2406 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2407 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2408 to the available target pattern.
2410 This function also does some bookkeeping, as explained in the documentation
2411 for vect_recog_pattern. */
2413 static void
2414 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2415 gimple_stmt_iterator si,
2416 VEC (gimple, heap) **stmts_to_replace)
2418 gimple stmt = gsi_stmt (si), pattern_stmt;
2419 stmt_vec_info stmt_info;
2420 loop_vec_info loop_vinfo;
2421 tree pattern_vectype;
2422 tree type_in, type_out;
2423 enum tree_code code;
2424 int i;
2425 gimple next;
2427 VEC_truncate (gimple, *stmts_to_replace, 0);
2428 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2429 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2430 if (!pattern_stmt)
2431 return;
2433 stmt = VEC_last (gimple, *stmts_to_replace);
2434 stmt_info = vinfo_for_stmt (stmt);
2435 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2437 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2439 /* No need to check target support (already checked by the pattern
2440 recognition function). */
2441 pattern_vectype = type_out ? type_out : type_in;
2443 else
2445 enum machine_mode vec_mode;
2446 enum insn_code icode;
2447 optab optab;
2449 /* Check target support */
2450 type_in = get_vectype_for_scalar_type (type_in);
2451 if (!type_in)
2452 return;
2453 if (type_out)
2454 type_out = get_vectype_for_scalar_type (type_out);
2455 else
2456 type_out = type_in;
2457 if (!type_out)
2458 return;
2459 pattern_vectype = type_out;
2461 if (is_gimple_assign (pattern_stmt))
2462 code = gimple_assign_rhs_code (pattern_stmt);
2463 else
2465 gcc_assert (is_gimple_call (pattern_stmt));
2466 code = CALL_EXPR;
2469 optab = optab_for_tree_code (code, type_in, optab_default);
2470 vec_mode = TYPE_MODE (type_in);
2471 if (!optab
2472 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2473 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2474 return;
2477 /* Found a vectorizable pattern. */
2478 if (vect_print_dump_info (REPORT_DETAILS))
2480 fprintf (vect_dump, "pattern recognized: ");
2481 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2484 /* Mark the stmts that are involved in the pattern. */
2485 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2487 /* Patterns cannot be vectorized using SLP, because they change the order of
2488 computation. */
2489 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2490 if (next == stmt)
2491 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2493 /* It is possible that additional pattern stmts are created and inserted in
2494 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2495 relevant statements. */
2496 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2497 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2498 i++)
2500 stmt_info = vinfo_for_stmt (stmt);
2501 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2502 if (vect_print_dump_info (REPORT_DETAILS))
2504 fprintf (vect_dump, "additional pattern stmt: ");
2505 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2508 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2513 /* Function vect_pattern_recog
2515 Input:
2516 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2517 computation idioms.
2519 Output - for each computation idiom that is detected we create a new stmt
2520 that provides the same functionality and that can be vectorized. We
2521 also record some information in the struct_stmt_info of the relevant
2522 stmts, as explained below:
2524 At the entry to this function we have the following stmts, with the
2525 following initial value in the STMT_VINFO fields:
2527 stmt in_pattern_p related_stmt vec_stmt
2528 S1: a_i = .... - - -
2529 S2: a_2 = ..use(a_i).. - - -
2530 S3: a_1 = ..use(a_2).. - - -
2531 S4: a_0 = ..use(a_1).. - - -
2532 S5: ... = ..use(a_0).. - - -
2534 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2535 represented by a single stmt. We then:
2536 - create a new stmt S6 equivalent to the pattern (the stmt is not
2537 inserted into the code)
2538 - fill in the STMT_VINFO fields as follows:
2540 in_pattern_p related_stmt vec_stmt
2541 S1: a_i = .... - - -
2542 S2: a_2 = ..use(a_i).. - - -
2543 S3: a_1 = ..use(a_2).. - - -
2544 S4: a_0 = ..use(a_1).. true S6 -
2545 '---> S6: a_new = .... - S4 -
2546 S5: ... = ..use(a_0).. - - -
2548 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2549 to each other through the RELATED_STMT field).
2551 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2552 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2553 remain irrelevant unless used by stmts other than S4.
2555 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2556 (because they are marked as irrelevant). It will vectorize S6, and record
2557 a pointer to the new vector stmt VS6 from S6 (as usual).
2558 S4 will be skipped, and S5 will be vectorized as usual:
2560 in_pattern_p related_stmt vec_stmt
2561 S1: a_i = .... - - -
2562 S2: a_2 = ..use(a_i).. - - -
2563 S3: a_1 = ..use(a_2).. - - -
2564 > VS6: va_new = .... - - -
2565 S4: a_0 = ..use(a_1).. true S6 VS6
2566 '---> S6: a_new = .... - S4 VS6
2567 > VS5: ... = ..vuse(va_new).. - - -
2568 S5: ... = ..use(a_0).. - - -
2570 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2571 elsewhere), and we'll end up with:
2573 VS6: va_new = ....
2574 VS5: ... = ..vuse(va_new)..
2576 In case of more than one pattern statements, e.g., widen-mult with
2577 intermediate type:
2579 S1 a_t = ;
2580 S2 a_T = (TYPE) a_t;
2581 '--> S3: a_it = (interm_type) a_t;
2582 S4 prod_T = a_T * CONST;
2583 '--> S5: prod_T' = a_it w* CONST;
2585 there may be other users of a_T outside the pattern. In that case S2 will
2586 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2587 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2588 be recorded in S3. */
2590 void
2591 vect_pattern_recog (loop_vec_info loop_vinfo)
2593 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2594 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2595 unsigned int nbbs = loop->num_nodes;
2596 gimple_stmt_iterator si;
2597 unsigned int i, j;
2598 vect_recog_func_ptr vect_recog_func;
2599 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2601 if (vect_print_dump_info (REPORT_DETAILS))
2602 fprintf (vect_dump, "=== vect_pattern_recog ===");
2604 /* Scan through the loop stmts, applying the pattern recognition
2605 functions starting at each stmt visited: */
2606 for (i = 0; i < nbbs; i++)
2608 basic_block bb = bbs[i];
2609 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2611 /* Scan over all generic vect_recog_xxx_pattern functions. */
2612 for (j = 0; j < NUM_PATTERNS; j++)
2614 vect_recog_func = vect_vect_recog_func_ptrs[j];
2615 vect_pattern_recog_1 (vect_recog_func, si,
2616 &stmts_to_replace);
2621 VEC_free (gimple, heap, stmts_to_replace);