* cpplib.pot: Regenerate.
[official-gcc.git] / gcc / tree-vect-patterns.c
blobb4fadf8b69e0621c9126833eea4df0e90f5a383a
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 tree *, tree *);
56 static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_widen_shift_pattern,
67 vect_recog_over_widening_pattern,
68 vect_recog_vector_vector_shift_pattern,
69 vect_recog_sdivmod_pow2_pattern,
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
73 static inline void
74 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 stmt);
80 static inline void
81 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
87 /* Check whether STMT2 is in the same loop or basic block as STMT1.
88 Which of the two applies depends on whether we're currently doing
89 loop-based or basic-block-based vectorization, as determined by
90 the vinfo_for_stmt for STMT1 (which must be defined).
92 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
93 to be defined as well. */
95 static bool
96 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
98 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
99 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
100 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
102 if (!gimple_bb (stmt2))
103 return false;
105 if (loop_vinfo)
107 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
108 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
109 return false;
111 else
113 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
114 || gimple_code (stmt2) == GIMPLE_PHI)
115 return false;
118 gcc_assert (vinfo_for_stmt (stmt2));
119 return true;
122 /* If the LHS of DEF_STMT has a single use, and that statement is
123 in the same loop or basic block, return it. */
125 static gimple
126 vect_single_imm_use (gimple def_stmt)
128 tree lhs = gimple_assign_lhs (def_stmt);
129 use_operand_p use_p;
130 gimple use_stmt;
132 if (!single_imm_use (lhs, &use_p, &use_stmt))
133 return NULL;
135 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
136 return NULL;
138 return use_stmt;
141 /* Check whether NAME, an ssa-name used in USE_STMT,
142 is a result of a type promotion or demotion, such that:
143 DEF_STMT: NAME = NOP (name0)
144 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
145 If CHECK_SIGN is TRUE, check that either both types are signed or both are
146 unsigned. */
148 static bool
149 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
150 tree *orig_type, gimple *def_stmt, bool *promotion)
152 tree dummy;
153 gimple dummy_gimple;
154 loop_vec_info loop_vinfo;
155 stmt_vec_info stmt_vinfo;
156 tree type = TREE_TYPE (name);
157 tree oprnd0;
158 enum vect_def_type dt;
159 tree def;
160 bb_vec_info bb_vinfo;
162 stmt_vinfo = vinfo_for_stmt (use_stmt);
163 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
164 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
165 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
166 &def, &dt))
167 return false;
169 if (dt != vect_internal_def
170 && dt != vect_external_def && dt != vect_constant_def)
171 return false;
173 if (!*def_stmt)
174 return false;
176 if (!is_gimple_assign (*def_stmt))
177 return false;
179 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
180 return false;
182 oprnd0 = gimple_assign_rhs1 (*def_stmt);
184 *orig_type = TREE_TYPE (oprnd0);
185 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
186 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
187 return false;
189 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
190 *promotion = true;
191 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
192 *promotion = false;
193 else
194 return false;
196 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
197 bb_vinfo, &dummy_gimple, &dummy, &dt))
198 return false;
200 return true;
203 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
204 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
206 static tree
207 vect_recog_temp_ssa_var (tree type, gimple stmt)
209 tree var = create_tmp_var (type, "patt");
211 add_referenced_var (var);
212 var = make_ssa_name (var, stmt);
213 return var;
216 /* Function vect_recog_dot_prod_pattern
218 Try to find the following pattern:
220 type x_t, y_t;
221 TYPE1 prod;
222 TYPE2 sum = init;
223 loop:
224 sum_0 = phi <init, sum_1>
225 S1 x_t = ...
226 S2 y_t = ...
227 S3 x_T = (TYPE1) x_t;
228 S4 y_T = (TYPE1) y_t;
229 S5 prod = x_T * y_T;
230 [S6 prod = (TYPE2) prod; #optional]
231 S7 sum_1 = prod + sum_0;
233 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
234 same size of 'TYPE1' or bigger. This is a special case of a reduction
235 computation.
237 Input:
239 * STMTS: Contains a stmt from which the pattern search begins. In the
240 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
241 will be detected.
243 Output:
245 * TYPE_IN: The type of the input arguments to the pattern.
247 * TYPE_OUT: The type of the output of this pattern.
249 * Return value: A new stmt that will be used to replace the sequence of
250 stmts that constitute the pattern. In this case it will be:
251 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
253 Note: The dot-prod idiom is a widening reduction pattern that is
254 vectorized without preserving all the intermediate results. It
255 produces only N/2 (widened) results (by summing up pairs of
256 intermediate results) rather than all N results. Therefore, we
257 cannot allow this pattern when we want to get all the results and in
258 the correct order (as is the case when this computation is in an
259 inner-loop nested in an outer-loop that us being vectorized). */
261 static gimple
262 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
263 tree *type_out)
265 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
266 tree oprnd0, oprnd1;
267 tree oprnd00, oprnd01;
268 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
269 tree type, half_type;
270 gimple pattern_stmt;
271 tree prod_type;
272 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
273 struct loop *loop;
274 tree var;
275 bool promotion;
277 if (!loop_info)
278 return NULL;
280 loop = LOOP_VINFO_LOOP (loop_info);
282 if (!is_gimple_assign (last_stmt))
283 return NULL;
285 type = gimple_expr_type (last_stmt);
287 /* Look for the following pattern
288 DX = (TYPE1) X;
289 DY = (TYPE1) Y;
290 DPROD = DX * DY;
291 DDPROD = (TYPE2) DPROD;
292 sum_1 = DDPROD + sum_0;
293 In which
294 - DX is double the size of X
295 - DY is double the size of Y
296 - DX, DY, DPROD all have the same type
297 - sum is the same size of DPROD or bigger
298 - sum has been recognized as a reduction variable.
300 This is equivalent to:
301 DPROD = X w* Y; #widen mult
302 sum_1 = DPROD w+ sum_0; #widen summation
304 DPROD = X w* Y; #widen mult
305 sum_1 = DPROD + sum_0; #summation
308 /* Starting from LAST_STMT, follow the defs of its uses in search
309 of the above pattern. */
311 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
312 return NULL;
314 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
316 /* Has been detected as widening-summation? */
318 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
319 type = gimple_expr_type (stmt);
320 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
321 return NULL;
322 oprnd0 = gimple_assign_rhs1 (stmt);
323 oprnd1 = gimple_assign_rhs2 (stmt);
324 half_type = TREE_TYPE (oprnd0);
326 else
328 gimple def_stmt;
330 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
331 return NULL;
332 oprnd0 = gimple_assign_rhs1 (last_stmt);
333 oprnd1 = gimple_assign_rhs2 (last_stmt);
334 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
335 || !types_compatible_p (TREE_TYPE (oprnd1), type))
336 return NULL;
337 stmt = last_stmt;
339 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
340 &promotion)
341 && promotion)
343 stmt = def_stmt;
344 oprnd0 = gimple_assign_rhs1 (stmt);
346 else
347 half_type = type;
350 /* So far so good. Since last_stmt was detected as a (summation) reduction,
351 we know that oprnd1 is the reduction variable (defined by a loop-header
352 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
353 Left to check that oprnd0 is defined by a (widen_)mult_expr */
354 if (TREE_CODE (oprnd0) != SSA_NAME)
355 return NULL;
357 prod_type = half_type;
358 stmt = SSA_NAME_DEF_STMT (oprnd0);
360 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
361 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
362 return NULL;
364 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
365 inside the loop (in case we are analyzing an outer-loop). */
366 if (!is_gimple_assign (stmt))
367 return NULL;
368 stmt_vinfo = vinfo_for_stmt (stmt);
369 gcc_assert (stmt_vinfo);
370 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
371 return NULL;
372 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
373 return NULL;
374 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
376 /* Has been detected as a widening multiplication? */
378 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
379 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
380 return NULL;
381 stmt_vinfo = vinfo_for_stmt (stmt);
382 gcc_assert (stmt_vinfo);
383 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
384 oprnd00 = gimple_assign_rhs1 (stmt);
385 oprnd01 = gimple_assign_rhs2 (stmt);
387 else
389 tree half_type0, half_type1;
390 gimple def_stmt;
391 tree oprnd0, oprnd1;
393 oprnd0 = gimple_assign_rhs1 (stmt);
394 oprnd1 = gimple_assign_rhs2 (stmt);
395 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
396 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
397 return NULL;
398 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
399 &promotion)
400 || !promotion)
401 return NULL;
402 oprnd00 = gimple_assign_rhs1 (def_stmt);
403 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
404 &promotion)
405 || !promotion)
406 return NULL;
407 oprnd01 = gimple_assign_rhs1 (def_stmt);
408 if (!types_compatible_p (half_type0, half_type1))
409 return NULL;
410 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
411 return NULL;
414 half_type = TREE_TYPE (oprnd00);
415 *type_in = half_type;
416 *type_out = type;
418 /* Pattern detected. Create a stmt to be used to replace the pattern: */
419 var = vect_recog_temp_ssa_var (type, NULL);
420 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
421 oprnd00, oprnd01, oprnd1);
423 if (vect_print_dump_info (REPORT_DETAILS))
425 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
426 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
429 /* We don't allow changing the order of the computation in the inner-loop
430 when doing outer-loop vectorization. */
431 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
433 return pattern_stmt;
437 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
438 and LSHIFT_EXPR.
440 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
441 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
443 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
444 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
445 that satisfies the above restrictions, we can perform a widening opeartion
446 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
447 with a_it = (interm_type) a_t; */
449 static bool
450 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
451 tree const_oprnd, tree *oprnd,
452 VEC (gimple, heap) **stmts, tree type,
453 tree *half_type, gimple def_stmt)
455 tree new_type, new_oprnd, tmp;
456 gimple new_stmt;
458 if (code != MULT_EXPR && code != LSHIFT_EXPR)
459 return false;
461 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
462 || (code == LSHIFT_EXPR
463 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
464 != 1))
465 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
467 /* CONST_OPRND is a constant of HALF_TYPE. */
468 *oprnd = gimple_assign_rhs1 (def_stmt);
469 return true;
472 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
473 return false;
475 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
476 return false;
478 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
479 a type 2 times bigger than HALF_TYPE. */
480 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
481 TYPE_UNSIGNED (type));
482 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
483 || (code == LSHIFT_EXPR
484 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
485 return false;
487 /* Use NEW_TYPE for widening operation. */
488 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
490 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
491 /* Check if the already created pattern stmt is what we need. */
492 if (!is_gimple_assign (new_stmt)
493 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
494 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
495 return false;
497 VEC_safe_push (gimple, heap, *stmts, def_stmt);
498 *oprnd = gimple_assign_lhs (new_stmt);
500 else
502 /* Create a_T = (NEW_TYPE) a_t; */
503 *oprnd = gimple_assign_rhs1 (def_stmt);
504 tmp = create_tmp_var (new_type, NULL);
505 add_referenced_var (tmp);
506 new_oprnd = make_ssa_name (tmp, NULL);
507 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
508 NULL_TREE);
509 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
510 VEC_safe_push (gimple, heap, *stmts, def_stmt);
511 *oprnd = new_oprnd;
514 *half_type = new_type;
515 return true;
519 /* Function vect_recog_widen_mult_pattern
521 Try to find the following pattern:
523 type a_t, b_t;
524 TYPE a_T, b_T, prod_T;
526 S1 a_t = ;
527 S2 b_t = ;
528 S3 a_T = (TYPE) a_t;
529 S4 b_T = (TYPE) b_t;
530 S5 prod_T = a_T * b_T;
532 where type 'TYPE' is at least double the size of type 'type'.
534 Also detect unsigned cases:
536 unsigned type a_t, b_t;
537 unsigned TYPE u_prod_T;
538 TYPE a_T, b_T, prod_T;
540 S1 a_t = ;
541 S2 b_t = ;
542 S3 a_T = (TYPE) a_t;
543 S4 b_T = (TYPE) b_t;
544 S5 prod_T = a_T * b_T;
545 S6 u_prod_T = (unsigned TYPE) prod_T;
547 and multiplication by constants:
549 type a_t;
550 TYPE a_T, prod_T;
552 S1 a_t = ;
553 S3 a_T = (TYPE) a_t;
554 S5 prod_T = a_T * CONST;
556 A special case of multiplication by constants is when 'TYPE' is 4 times
557 bigger than 'type', but CONST fits an intermediate type 2 times smaller
558 than 'TYPE'. In that case we create an additional pattern stmt for S3
559 to create a variable of the intermediate type, and perform widen-mult
560 on the intermediate type as well:
562 type a_t;
563 interm_type a_it;
564 TYPE a_T, prod_T, prod_T';
566 S1 a_t = ;
567 S3 a_T = (TYPE) a_t;
568 '--> a_it = (interm_type) a_t;
569 S5 prod_T = a_T * CONST;
570 '--> prod_T' = a_it w* CONST;
572 Input/Output:
574 * STMTS: Contains a stmt from which the pattern search begins. In the
575 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
576 is detected. In case of unsigned widen-mult, the original stmt (S5) is
577 replaced with S6 in STMTS. In case of multiplication by a constant
578 of an intermediate type (the last case above), STMTS also contains S3
579 (inserted before S5).
581 Output:
583 * TYPE_IN: The type of the input arguments to the pattern.
585 * TYPE_OUT: The type of the output of this pattern.
587 * Return value: A new stmt that will be used to replace the sequence of
588 stmts that constitute the pattern. In this case it will be:
589 WIDEN_MULT <a_t, b_t>
592 static gimple
593 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
594 tree *type_in, tree *type_out)
596 gimple last_stmt = VEC_pop (gimple, *stmts);
597 gimple def_stmt0, def_stmt1;
598 tree oprnd0, oprnd1;
599 tree type, half_type0, half_type1;
600 gimple pattern_stmt;
601 tree vectype, vectype_out = NULL_TREE;
602 tree dummy;
603 tree var;
604 enum tree_code dummy_code;
605 int dummy_int;
606 VEC (tree, heap) *dummy_vec;
607 bool op1_ok;
608 bool promotion;
610 if (!is_gimple_assign (last_stmt))
611 return NULL;
613 type = gimple_expr_type (last_stmt);
615 /* Starting from LAST_STMT, follow the defs of its uses in search
616 of the above pattern. */
618 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
619 return NULL;
621 oprnd0 = gimple_assign_rhs1 (last_stmt);
622 oprnd1 = gimple_assign_rhs2 (last_stmt);
623 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
624 || !types_compatible_p (TREE_TYPE (oprnd1), type))
625 return NULL;
627 /* Check argument 0. */
628 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
629 &promotion)
630 || !promotion)
631 return NULL;
632 /* Check argument 1. */
633 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
634 &def_stmt1, &promotion);
636 if (op1_ok && promotion)
638 oprnd0 = gimple_assign_rhs1 (def_stmt0);
639 oprnd1 = gimple_assign_rhs1 (def_stmt1);
641 else
643 if (TREE_CODE (oprnd1) == INTEGER_CST
644 && TREE_CODE (half_type0) == INTEGER_TYPE
645 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
646 &oprnd0, stmts, type,
647 &half_type0, def_stmt0))
648 half_type1 = half_type0;
649 else
650 return NULL;
653 /* Handle unsigned case. Look for
654 S6 u_prod_T = (unsigned TYPE) prod_T;
655 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
656 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
658 gimple use_stmt;
659 tree use_lhs;
660 tree use_type;
662 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
663 return NULL;
665 use_stmt = vect_single_imm_use (last_stmt);
666 if (!use_stmt || !is_gimple_assign (use_stmt)
667 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
668 return NULL;
670 use_lhs = gimple_assign_lhs (use_stmt);
671 use_type = TREE_TYPE (use_lhs);
672 if (!INTEGRAL_TYPE_P (use_type)
673 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
674 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
675 return NULL;
677 type = use_type;
678 last_stmt = use_stmt;
681 if (!types_compatible_p (half_type0, half_type1))
682 return NULL;
684 /* Pattern detected. */
685 if (vect_print_dump_info (REPORT_DETAILS))
686 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
688 /* Check target support */
689 vectype = get_vectype_for_scalar_type (half_type0);
690 vectype_out = get_vectype_for_scalar_type (type);
691 if (!vectype
692 || !vectype_out
693 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
694 vectype_out, vectype,
695 &dummy, &dummy, &dummy_code,
696 &dummy_code, &dummy_int, &dummy_vec))
697 return NULL;
699 *type_in = vectype;
700 *type_out = vectype_out;
702 /* Pattern supported. Create a stmt to be used to replace the pattern: */
703 var = vect_recog_temp_ssa_var (type, NULL);
704 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
705 oprnd1);
707 if (vect_print_dump_info (REPORT_DETAILS))
708 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
710 VEC_safe_push (gimple, heap, *stmts, last_stmt);
711 return pattern_stmt;
715 /* Function vect_recog_pow_pattern
717 Try to find the following pattern:
719 x = POW (y, N);
721 with POW being one of pow, powf, powi, powif and N being
722 either 2 or 0.5.
724 Input:
726 * LAST_STMT: A stmt from which the pattern search begins.
728 Output:
730 * TYPE_IN: The type of the input arguments to the pattern.
732 * TYPE_OUT: The type of the output of this pattern.
734 * Return value: A new stmt that will be used to replace the sequence of
735 stmts that constitute the pattern. In this case it will be:
736 x = x * x
738 x = sqrt (x)
741 static gimple
742 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
743 tree *type_out)
745 gimple last_stmt = VEC_index (gimple, *stmts, 0);
746 tree fn, base, exp = NULL;
747 gimple stmt;
748 tree var;
750 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
751 return NULL;
753 fn = gimple_call_fndecl (last_stmt);
754 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
755 return NULL;
757 switch (DECL_FUNCTION_CODE (fn))
759 case BUILT_IN_POWIF:
760 case BUILT_IN_POWI:
761 case BUILT_IN_POWF:
762 case BUILT_IN_POW:
763 base = gimple_call_arg (last_stmt, 0);
764 exp = gimple_call_arg (last_stmt, 1);
765 if (TREE_CODE (exp) != REAL_CST
766 && TREE_CODE (exp) != INTEGER_CST)
767 return NULL;
768 break;
770 default:
771 return NULL;
774 /* We now have a pow or powi builtin function call with a constant
775 exponent. */
777 *type_out = NULL_TREE;
779 /* Catch squaring. */
780 if ((host_integerp (exp, 0)
781 && tree_low_cst (exp, 0) == 2)
782 || (TREE_CODE (exp) == REAL_CST
783 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
785 *type_in = TREE_TYPE (base);
787 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
788 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
789 return stmt;
792 /* Catch square root. */
793 if (TREE_CODE (exp) == REAL_CST
794 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
796 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
797 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
798 if (*type_in)
800 gimple stmt = gimple_build_call (newfn, 1, base);
801 if (vectorizable_function (stmt, *type_in, *type_in)
802 != NULL_TREE)
804 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
805 gimple_call_set_lhs (stmt, var);
806 return stmt;
811 return NULL;
815 /* Function vect_recog_widen_sum_pattern
817 Try to find the following pattern:
819 type x_t;
820 TYPE x_T, sum = init;
821 loop:
822 sum_0 = phi <init, sum_1>
823 S1 x_t = *p;
824 S2 x_T = (TYPE) x_t;
825 S3 sum_1 = x_T + sum_0;
827 where type 'TYPE' is at least double the size of type 'type', i.e - we're
828 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
829 a special case of a reduction computation.
831 Input:
833 * LAST_STMT: A stmt from which the pattern search begins. In the example,
834 when this function is called with S3, the pattern {S2,S3} will be detected.
836 Output:
838 * TYPE_IN: The type of the input arguments to the pattern.
840 * TYPE_OUT: The type of the output of this pattern.
842 * Return value: A new stmt that will be used to replace the sequence of
843 stmts that constitute the pattern. In this case it will be:
844 WIDEN_SUM <x_t, sum_0>
846 Note: The widening-sum idiom is a widening reduction pattern that is
847 vectorized without preserving all the intermediate results. It
848 produces only N/2 (widened) results (by summing up pairs of
849 intermediate results) rather than all N results. Therefore, we
850 cannot allow this pattern when we want to get all the results and in
851 the correct order (as is the case when this computation is in an
852 inner-loop nested in an outer-loop that us being vectorized). */
854 static gimple
855 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
856 tree *type_out)
858 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
859 tree oprnd0, oprnd1;
860 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
861 tree type, half_type;
862 gimple pattern_stmt;
863 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
864 struct loop *loop;
865 tree var;
866 bool promotion;
868 if (!loop_info)
869 return NULL;
871 loop = LOOP_VINFO_LOOP (loop_info);
873 if (!is_gimple_assign (last_stmt))
874 return NULL;
876 type = gimple_expr_type (last_stmt);
878 /* Look for the following pattern
879 DX = (TYPE) X;
880 sum_1 = DX + sum_0;
881 In which DX is at least double the size of X, and sum_1 has been
882 recognized as a reduction variable.
885 /* Starting from LAST_STMT, follow the defs of its uses in search
886 of the above pattern. */
888 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
889 return NULL;
891 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
892 return NULL;
894 oprnd0 = gimple_assign_rhs1 (last_stmt);
895 oprnd1 = gimple_assign_rhs2 (last_stmt);
896 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
897 || !types_compatible_p (TREE_TYPE (oprnd1), type))
898 return NULL;
900 /* So far so good. Since last_stmt was detected as a (summation) reduction,
901 we know that oprnd1 is the reduction variable (defined by a loop-header
902 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
903 Left to check that oprnd0 is defined by a cast from type 'type' to type
904 'TYPE'. */
906 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
907 &promotion)
908 || !promotion)
909 return NULL;
911 oprnd0 = gimple_assign_rhs1 (stmt);
912 *type_in = half_type;
913 *type_out = type;
915 /* Pattern detected. Create a stmt to be used to replace the pattern: */
916 var = vect_recog_temp_ssa_var (type, NULL);
917 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
918 oprnd0, oprnd1);
920 if (vect_print_dump_info (REPORT_DETAILS))
922 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
923 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
926 /* We don't allow changing the order of the computation in the inner-loop
927 when doing outer-loop vectorization. */
928 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
930 return pattern_stmt;
934 /* Return TRUE if the operation in STMT can be performed on a smaller type.
936 Input:
937 STMT - a statement to check.
938 DEF - we support operations with two operands, one of which is constant.
939 The other operand can be defined by a demotion operation, or by a
940 previous statement in a sequence of over-promoted operations. In the
941 later case DEF is used to replace that operand. (It is defined by a
942 pattern statement we created for the previous statement in the
943 sequence).
945 Input/output:
946 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
947 NULL, it's the type of DEF.
948 STMTS - additional pattern statements. If a pattern statement (type
949 conversion) is created in this function, its original statement is
950 added to STMTS.
952 Output:
953 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
954 operands to use in the new pattern statement for STMT (will be created
955 in vect_recog_over_widening_pattern ()).
956 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
957 statements for STMT: the first one is a type promotion and the second
958 one is the operation itself. We return the type promotion statement
959 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
960 the second pattern statement. */
962 static bool
963 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
964 tree *op0, tree *op1, gimple *new_def_stmt,
965 VEC (gimple, heap) **stmts)
967 enum tree_code code;
968 tree const_oprnd, oprnd;
969 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
970 gimple def_stmt, new_stmt;
971 bool first = false;
972 bool promotion;
974 *op0 = NULL_TREE;
975 *op1 = NULL_TREE;
976 *new_def_stmt = NULL;
978 if (!is_gimple_assign (stmt))
979 return false;
981 code = gimple_assign_rhs_code (stmt);
982 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
983 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
984 return false;
986 oprnd = gimple_assign_rhs1 (stmt);
987 const_oprnd = gimple_assign_rhs2 (stmt);
988 type = gimple_expr_type (stmt);
990 if (TREE_CODE (oprnd) != SSA_NAME
991 || TREE_CODE (const_oprnd) != INTEGER_CST)
992 return false;
994 /* If we are in the middle of a sequence, we use DEF from a previous
995 statement. Otherwise, OPRND has to be a result of type promotion. */
996 if (*new_type)
998 half_type = *new_type;
999 oprnd = def;
1001 else
1003 first = true;
1004 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1005 &promotion)
1006 || !promotion
1007 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1008 return false;
1011 /* Can we perform the operation on a smaller type? */
1012 switch (code)
1014 case BIT_IOR_EXPR:
1015 case BIT_XOR_EXPR:
1016 case BIT_AND_EXPR:
1017 if (!int_fits_type_p (const_oprnd, half_type))
1019 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1020 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1021 return false;
1023 interm_type = build_nonstandard_integer_type (
1024 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1025 if (!int_fits_type_p (const_oprnd, interm_type))
1026 return false;
1029 break;
1031 case LSHIFT_EXPR:
1032 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1033 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1034 return false;
1036 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1037 (e.g., if the original value was char, the shift amount is at most 8
1038 if we want to use short). */
1039 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1040 return false;
1042 interm_type = build_nonstandard_integer_type (
1043 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1045 if (!vect_supportable_shift (code, interm_type))
1046 return false;
1048 break;
1050 case RSHIFT_EXPR:
1051 if (vect_supportable_shift (code, half_type))
1052 break;
1054 /* Try intermediate type - HALF_TYPE is not supported. */
1055 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1056 return false;
1058 interm_type = build_nonstandard_integer_type (
1059 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1061 if (!vect_supportable_shift (code, interm_type))
1062 return false;
1064 break;
1066 default:
1067 gcc_unreachable ();
1070 /* There are four possible cases:
1071 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1072 the first statement in the sequence)
1073 a. The original, HALF_TYPE, is not enough - we replace the promotion
1074 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1075 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1076 promotion.
1077 2. OPRND is defined by a pattern statement we created.
1078 a. Its type is not sufficient for the operation, we create a new stmt:
1079 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1080 this statement in NEW_DEF_STMT, and it is later put in
1081 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1082 b. OPRND is good to use in the new statement. */
1083 if (first)
1085 if (interm_type)
1087 /* Replace the original type conversion HALF_TYPE->TYPE with
1088 HALF_TYPE->INTERM_TYPE. */
1089 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1091 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1092 /* Check if the already created pattern stmt is what we need. */
1093 if (!is_gimple_assign (new_stmt)
1094 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1095 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1096 return false;
1098 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1099 oprnd = gimple_assign_lhs (new_stmt);
1101 else
1103 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1104 oprnd = gimple_assign_rhs1 (def_stmt);
1105 tmp = create_tmp_reg (interm_type, NULL);
1106 add_referenced_var (tmp);
1107 new_oprnd = make_ssa_name (tmp, NULL);
1108 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1109 oprnd, NULL_TREE);
1110 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1111 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1112 oprnd = new_oprnd;
1115 else
1117 /* Retrieve the operand before the type promotion. */
1118 oprnd = gimple_assign_rhs1 (def_stmt);
1121 else
1123 if (interm_type)
1125 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1126 tmp = create_tmp_reg (interm_type, NULL);
1127 add_referenced_var (tmp);
1128 new_oprnd = make_ssa_name (tmp, NULL);
1129 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1130 oprnd, NULL_TREE);
1131 oprnd = new_oprnd;
1132 *new_def_stmt = new_stmt;
1135 /* Otherwise, OPRND is already set. */
1138 if (interm_type)
1139 *new_type = interm_type;
1140 else
1141 *new_type = half_type;
1143 *op0 = oprnd;
1144 *op1 = fold_convert (*new_type, const_oprnd);
1146 return true;
1150 /* Try to find a statement or a sequence of statements that can be performed
1151 on a smaller type:
1153 type x_t;
1154 TYPE x_T, res0_T, res1_T;
1155 loop:
1156 S1 x_t = *p;
1157 S2 x_T = (TYPE) x_t;
1158 S3 res0_T = op (x_T, C0);
1159 S4 res1_T = op (res0_T, C1);
1160 S5 ... = () res1_T; - type demotion
1162 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1163 constants.
1164 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1165 be 'type' or some intermediate type. For now, we expect S5 to be a type
1166 demotion operation. We also check that S3 and S4 have only one use. */
1168 static gimple
1169 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1170 tree *type_in, tree *type_out)
1172 gimple stmt = VEC_pop (gimple, *stmts);
1173 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1174 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1175 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1176 bool first;
1177 tree type = NULL;
1179 first = true;
1180 while (1)
1182 if (!vinfo_for_stmt (stmt)
1183 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1184 return NULL;
1186 new_def_stmt = NULL;
1187 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1188 &op0, &op1, &new_def_stmt,
1189 stmts))
1191 if (first)
1192 return NULL;
1193 else
1194 break;
1197 /* STMT can be performed on a smaller type. Check its uses. */
1198 use_stmt = vect_single_imm_use (stmt);
1199 if (!use_stmt || !is_gimple_assign (use_stmt))
1200 return NULL;
1202 /* Create pattern statement for STMT. */
1203 vectype = get_vectype_for_scalar_type (new_type);
1204 if (!vectype)
1205 return NULL;
1207 /* We want to collect all the statements for which we create pattern
1208 statetments, except for the case when the last statement in the
1209 sequence doesn't have a corresponding pattern statement. In such
1210 case we associate the last pattern statement with the last statement
1211 in the sequence. Therefore, we only add the original statement to
1212 the list if we know that it is not the last. */
1213 if (prev_stmt)
1214 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1216 var = vect_recog_temp_ssa_var (new_type, NULL);
1217 pattern_stmt
1218 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1219 op0, op1);
1220 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1221 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1223 if (vect_print_dump_info (REPORT_DETAILS))
1225 fprintf (vect_dump, "created pattern stmt: ");
1226 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1229 type = gimple_expr_type (stmt);
1230 prev_stmt = stmt;
1231 stmt = use_stmt;
1233 first = false;
1236 /* We got a sequence. We expect it to end with a type demotion operation.
1237 Otherwise, we quit (for now). There are three possible cases: the
1238 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1239 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1240 NEW_TYPE differs (we create a new conversion statement). */
1241 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1243 use_lhs = gimple_assign_lhs (use_stmt);
1244 use_type = TREE_TYPE (use_lhs);
1245 /* Support only type demotion or signedess change. */
1246 if (!INTEGRAL_TYPE_P (use_type)
1247 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1248 return NULL;
1250 /* Check that NEW_TYPE is not bigger than the conversion result. */
1251 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1252 return NULL;
1254 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1255 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1257 /* Create NEW_TYPE->USE_TYPE conversion. */
1258 tmp = create_tmp_reg (use_type, NULL);
1259 add_referenced_var (tmp);
1260 new_oprnd = make_ssa_name (tmp, NULL);
1261 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1262 var, NULL_TREE);
1263 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1265 *type_in = get_vectype_for_scalar_type (new_type);
1266 *type_out = get_vectype_for_scalar_type (use_type);
1268 /* We created a pattern statement for the last statement in the
1269 sequence, so we don't need to associate it with the pattern
1270 statement created for PREV_STMT. Therefore, we add PREV_STMT
1271 to the list in order to mark it later in vect_pattern_recog_1. */
1272 if (prev_stmt)
1273 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1275 else
1277 if (prev_stmt)
1278 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1279 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1281 *type_in = vectype;
1282 *type_out = NULL_TREE;
1285 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1287 else
1288 /* TODO: support general case, create a conversion to the correct type. */
1289 return NULL;
1291 /* Pattern detected. */
1292 if (vect_print_dump_info (REPORT_DETAILS))
1294 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1295 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1298 return pattern_stmt;
1301 /* Detect widening shift pattern:
1303 type a_t;
1304 TYPE a_T, res_T;
1306 S1 a_t = ;
1307 S2 a_T = (TYPE) a_t;
1308 S3 res_T = a_T << CONST;
1310 where type 'TYPE' is at least double the size of type 'type'.
1312 Also detect cases where the shift result is immediately converted
1313 to another type 'result_type' that is no larger in size than 'TYPE'.
1314 In those cases we perform a widen-shift that directly results in
1315 'result_type', to avoid a possible over-widening situation:
1317 type a_t;
1318 TYPE a_T, res_T;
1319 result_type res_result;
1321 S1 a_t = ;
1322 S2 a_T = (TYPE) a_t;
1323 S3 res_T = a_T << CONST;
1324 S4 res_result = (result_type) res_T;
1325 '--> res_result' = a_t w<< CONST;
1327 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1328 create an additional pattern stmt for S2 to create a variable of an
1329 intermediate type, and perform widen-shift on the intermediate type:
1331 type a_t;
1332 interm_type a_it;
1333 TYPE a_T, res_T, res_T';
1335 S1 a_t = ;
1336 S2 a_T = (TYPE) a_t;
1337 '--> a_it = (interm_type) a_t;
1338 S3 res_T = a_T << CONST;
1339 '--> res_T' = a_it <<* CONST;
1341 Input/Output:
1343 * STMTS: Contains a stmt from which the pattern search begins.
1344 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1345 in STMTS. When an intermediate type is used and a pattern statement is
1346 created for S2, we also put S2 here (before S3).
1348 Output:
1350 * TYPE_IN: The type of the input arguments to the pattern.
1352 * TYPE_OUT: The type of the output of this pattern.
1354 * Return value: A new stmt that will be used to replace the sequence of
1355 stmts that constitute the pattern. In this case it will be:
1356 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1358 static gimple
1359 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1360 tree *type_in, tree *type_out)
1362 gimple last_stmt = VEC_pop (gimple, *stmts);
1363 gimple def_stmt0;
1364 tree oprnd0, oprnd1;
1365 tree type, half_type0;
1366 gimple pattern_stmt;
1367 tree vectype, vectype_out = NULL_TREE;
1368 tree dummy;
1369 tree var;
1370 enum tree_code dummy_code;
1371 int dummy_int;
1372 VEC (tree, heap) * dummy_vec;
1373 gimple use_stmt;
1374 bool promotion;
1376 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1377 return NULL;
1379 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1380 return NULL;
1382 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1383 return NULL;
1385 oprnd0 = gimple_assign_rhs1 (last_stmt);
1386 oprnd1 = gimple_assign_rhs2 (last_stmt);
1387 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1388 return NULL;
1390 /* Check operand 0: it has to be defined by a type promotion. */
1391 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1392 &promotion)
1393 || !promotion)
1394 return NULL;
1396 /* Check operand 1: has to be positive. We check that it fits the type
1397 in vect_handle_widen_op_by_const (). */
1398 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1399 return NULL;
1401 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1402 type = gimple_expr_type (last_stmt);
1404 /* Check for subsequent conversion to another type. */
1405 use_stmt = vect_single_imm_use (last_stmt);
1406 if (use_stmt && is_gimple_assign (use_stmt)
1407 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1408 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1410 tree use_lhs = gimple_assign_lhs (use_stmt);
1411 tree use_type = TREE_TYPE (use_lhs);
1413 if (INTEGRAL_TYPE_P (use_type)
1414 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1416 last_stmt = use_stmt;
1417 type = use_type;
1421 /* Check if this a widening operation. */
1422 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1423 &oprnd0, stmts,
1424 type, &half_type0, def_stmt0))
1425 return NULL;
1427 /* Pattern detected. */
1428 if (vect_print_dump_info (REPORT_DETAILS))
1429 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1431 /* Check target support. */
1432 vectype = get_vectype_for_scalar_type (half_type0);
1433 vectype_out = get_vectype_for_scalar_type (type);
1435 if (!vectype
1436 || !vectype_out
1437 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1438 vectype_out, vectype,
1439 &dummy, &dummy, &dummy_code,
1440 &dummy_code, &dummy_int,
1441 &dummy_vec))
1442 return NULL;
1444 *type_in = vectype;
1445 *type_out = vectype_out;
1447 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1448 var = vect_recog_temp_ssa_var (type, NULL);
1449 pattern_stmt =
1450 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1452 if (vect_print_dump_info (REPORT_DETAILS))
1453 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1455 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1456 return pattern_stmt;
1459 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1460 vectorized:
1462 type a_t;
1463 TYPE b_T, res_T;
1465 S1 a_t = ;
1466 S2 b_T = ;
1467 S3 res_T = b_T op a_t;
1469 where type 'TYPE' is a type with different size than 'type',
1470 and op is <<, >> or rotate.
1472 Also detect cases:
1474 type a_t;
1475 TYPE b_T, c_T, res_T;
1477 S0 c_T = ;
1478 S1 a_t = (type) c_T;
1479 S2 b_T = ;
1480 S3 res_T = b_T op a_t;
1482 Input/Output:
1484 * STMTS: Contains a stmt from which the pattern search begins,
1485 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1486 with a shift/rotate which has same type on both operands, in the
1487 second case just b_T op c_T, in the first case with added cast
1488 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1490 Output:
1492 * TYPE_IN: The type of the input arguments to the pattern.
1494 * TYPE_OUT: The type of the output of this pattern.
1496 * Return value: A new stmt that will be used to replace the shift/rotate
1497 S3 stmt. */
1499 static gimple
1500 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1501 tree *type_in, tree *type_out)
1503 gimple last_stmt = VEC_pop (gimple, *stmts);
1504 tree oprnd0, oprnd1, lhs, var;
1505 gimple pattern_stmt, def_stmt;
1506 enum tree_code rhs_code;
1507 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1508 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1509 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1510 enum vect_def_type dt;
1511 tree def;
1513 if (!is_gimple_assign (last_stmt))
1514 return NULL;
1516 rhs_code = gimple_assign_rhs_code (last_stmt);
1517 switch (rhs_code)
1519 case LSHIFT_EXPR:
1520 case RSHIFT_EXPR:
1521 case LROTATE_EXPR:
1522 case RROTATE_EXPR:
1523 break;
1524 default:
1525 return NULL;
1528 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1529 return NULL;
1531 lhs = gimple_assign_lhs (last_stmt);
1532 oprnd0 = gimple_assign_rhs1 (last_stmt);
1533 oprnd1 = gimple_assign_rhs2 (last_stmt);
1534 if (TREE_CODE (oprnd0) != SSA_NAME
1535 || TREE_CODE (oprnd1) != SSA_NAME
1536 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1537 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1538 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1539 || TYPE_PRECISION (TREE_TYPE (lhs))
1540 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1541 return NULL;
1543 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1544 &def, &dt))
1545 return NULL;
1547 if (dt != vect_internal_def)
1548 return NULL;
1550 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1551 *type_out = *type_in;
1552 if (*type_in == NULL_TREE)
1553 return NULL;
1555 def = NULL_TREE;
1556 if (gimple_assign_cast_p (def_stmt))
1558 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1559 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1560 && TYPE_PRECISION (TREE_TYPE (rhs1))
1561 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1562 def = rhs1;
1565 if (def == NULL_TREE)
1567 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1568 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1569 NULL_TREE);
1570 new_pattern_def_seq (stmt_vinfo, def_stmt);
1573 /* Pattern detected. */
1574 if (vect_print_dump_info (REPORT_DETAILS))
1575 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1577 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1578 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1579 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1581 if (vect_print_dump_info (REPORT_DETAILS))
1582 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1584 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1585 return pattern_stmt;
1588 /* Detect a signed division by power of two constant that wouldn't be
1589 otherwise vectorized:
1591 type a_t, b_t;
1593 S1 a_t = b_t / N;
1595 where type 'type' is a signed integral type and N is a constant positive
1596 power of two.
1598 Similarly handle signed modulo by power of two constant:
1600 S4 a_t = b_t % N;
1602 Input/Output:
1604 * STMTS: Contains a stmt from which the pattern search begins,
1605 i.e. the division stmt. S1 is replaced by:
1606 S3 y_t = b_t < 0 ? N - 1 : 0;
1607 S2 x_t = b_t + y_t;
1608 S1' a_t = x_t >> log2 (N);
1610 S4 is replaced by (where *_T temporaries have unsigned type):
1611 S9 y_T = b_t < 0 ? -1U : 0U;
1612 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1613 S7 z_t = (type) z_T;
1614 S6 w_t = b_t + z_t;
1615 S5 x_t = w_t & (N - 1);
1616 S4' a_t = x_t - z_t;
1618 Output:
1620 * TYPE_IN: The type of the input arguments to the pattern.
1622 * TYPE_OUT: The type of the output of this pattern.
1624 * Return value: A new stmt that will be used to replace the division
1625 S1 or modulo S4 stmt. */
1627 static gimple
1628 vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1629 tree *type_in, tree *type_out)
1631 gimple last_stmt = VEC_pop (gimple, *stmts);
1632 tree oprnd0, oprnd1, vectype, itype, cond;
1633 gimple pattern_stmt, def_stmt;
1634 enum tree_code rhs_code;
1635 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1636 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1637 optab optab;
1639 if (!is_gimple_assign (last_stmt))
1640 return NULL;
1642 rhs_code = gimple_assign_rhs_code (last_stmt);
1643 switch (rhs_code)
1645 case TRUNC_DIV_EXPR:
1646 case TRUNC_MOD_EXPR:
1647 break;
1648 default:
1649 return NULL;
1652 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1653 return NULL;
1655 oprnd0 = gimple_assign_rhs1 (last_stmt);
1656 oprnd1 = gimple_assign_rhs2 (last_stmt);
1657 itype = TREE_TYPE (oprnd0);
1658 if (TREE_CODE (oprnd0) != SSA_NAME
1659 || TREE_CODE (oprnd1) != INTEGER_CST
1660 || TREE_CODE (itype) != INTEGER_TYPE
1661 || TYPE_UNSIGNED (itype)
1662 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1663 || !integer_pow2p (oprnd1)
1664 || tree_int_cst_sgn (oprnd1) != 1)
1665 return NULL;
1667 vectype = get_vectype_for_scalar_type (itype);
1668 if (vectype == NULL_TREE)
1669 return NULL;
1671 /* If the target can handle vectorized division or modulo natively,
1672 don't attempt to optimize this. */
1673 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1674 if (optab != NULL)
1676 enum machine_mode vec_mode = TYPE_MODE (vectype);
1677 int icode = (int) optab_handler (optab, vec_mode);
1678 if (icode != CODE_FOR_nothing
1679 || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1680 return NULL;
1683 /* Pattern detected. */
1684 if (vect_print_dump_info (REPORT_DETAILS))
1685 fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1687 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1688 if (rhs_code == TRUNC_DIV_EXPR)
1690 tree var = vect_recog_temp_ssa_var (itype, NULL);
1691 def_stmt
1692 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1693 fold_build2 (MINUS_EXPR, itype,
1694 oprnd1,
1695 build_int_cst (itype,
1696 1)),
1697 build_int_cst (itype, 0));
1698 new_pattern_def_seq (stmt_vinfo, def_stmt);
1699 var = vect_recog_temp_ssa_var (itype, NULL);
1700 def_stmt
1701 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1702 gimple_assign_lhs (def_stmt));
1703 append_pattern_def_seq (stmt_vinfo, def_stmt);
1705 pattern_stmt
1706 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1707 vect_recog_temp_ssa_var (itype, NULL),
1708 var,
1709 build_int_cst (itype,
1710 tree_log2 (oprnd1)));
1712 else
1714 tree signmask;
1715 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1716 if (compare_tree_int (oprnd1, 2) == 0)
1718 signmask = vect_recog_temp_ssa_var (itype, NULL);
1719 def_stmt
1720 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1721 build_int_cst (itype, 1),
1722 build_int_cst (itype, 0));
1723 append_pattern_def_seq (stmt_vinfo, def_stmt);
1725 else
1727 tree utype
1728 = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1729 tree vecutype = get_vectype_for_scalar_type (utype);
1730 tree shift
1731 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1732 - tree_log2 (oprnd1));
1733 tree var = vect_recog_temp_ssa_var (utype, NULL);
1734 stmt_vec_info def_stmt_vinfo;
1736 def_stmt
1737 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1738 build_int_cst (utype, -1),
1739 build_int_cst (utype, 0));
1740 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1741 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1742 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1743 append_pattern_def_seq (stmt_vinfo, def_stmt);
1744 var = vect_recog_temp_ssa_var (utype, NULL);
1745 def_stmt
1746 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1747 gimple_assign_lhs (def_stmt),
1748 shift);
1749 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1750 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1751 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1752 append_pattern_def_seq (stmt_vinfo, def_stmt);
1753 signmask = vect_recog_temp_ssa_var (itype, NULL);
1754 def_stmt
1755 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1756 NULL_TREE);
1757 append_pattern_def_seq (stmt_vinfo, def_stmt);
1759 def_stmt
1760 = gimple_build_assign_with_ops (PLUS_EXPR,
1761 vect_recog_temp_ssa_var (itype, NULL),
1762 oprnd0, signmask);
1763 append_pattern_def_seq (stmt_vinfo, def_stmt);
1764 def_stmt
1765 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1766 vect_recog_temp_ssa_var (itype, NULL),
1767 gimple_assign_lhs (def_stmt),
1768 fold_build2 (MINUS_EXPR, itype,
1769 oprnd1,
1770 build_int_cst (itype,
1771 1)));
1772 append_pattern_def_seq (stmt_vinfo, def_stmt);
1774 pattern_stmt
1775 = gimple_build_assign_with_ops (MINUS_EXPR,
1776 vect_recog_temp_ssa_var (itype, NULL),
1777 gimple_assign_lhs (def_stmt),
1778 signmask);
1781 if (vect_print_dump_info (REPORT_DETAILS))
1782 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1784 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1786 *type_in = vectype;
1787 *type_out = vectype;
1788 return pattern_stmt;
1791 /* Function vect_recog_mixed_size_cond_pattern
1793 Try to find the following pattern:
1795 type x_t, y_t;
1796 TYPE a_T, b_T, c_T;
1797 loop:
1798 S1 a_T = x_t CMP y_t ? b_T : c_T;
1800 where type 'TYPE' is an integral type which has different size
1801 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
1802 than 'type', the constants need to fit into an integer type
1803 with the same width as 'type') or results of conversion from 'type'.
1805 Input:
1807 * LAST_STMT: A stmt from which the pattern search begins.
1809 Output:
1811 * TYPE_IN: The type of the input arguments to the pattern.
1813 * TYPE_OUT: The type of the output of this pattern.
1815 * Return value: A new stmt that will be used to replace the pattern.
1816 Additionally a def_stmt is added.
1818 a_it = x_t CMP y_t ? b_it : c_it;
1819 a_T = (TYPE) a_it; */
1821 static gimple
1822 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1823 tree *type_out)
1825 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1826 tree cond_expr, then_clause, else_clause;
1827 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1828 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
1829 enum machine_mode cmpmode;
1830 gimple pattern_stmt, def_stmt;
1831 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1832 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1833 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
1834 gimple def_stmt0 = NULL, def_stmt1 = NULL;
1835 bool promotion;
1836 tree comp_scalar_type;
1838 if (!is_gimple_assign (last_stmt)
1839 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1840 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1841 return NULL;
1843 cond_expr = gimple_assign_rhs1 (last_stmt);
1844 then_clause = gimple_assign_rhs2 (last_stmt);
1845 else_clause = gimple_assign_rhs3 (last_stmt);
1847 if (!COMPARISON_CLASS_P (cond_expr))
1848 return NULL;
1850 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
1851 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
1852 if (comp_vectype == NULL_TREE)
1853 return NULL;
1855 type = gimple_expr_type (last_stmt);
1856 if (types_compatible_p (type, comp_scalar_type)
1857 || ((TREE_CODE (then_clause) != INTEGER_CST
1858 || TREE_CODE (else_clause) != INTEGER_CST)
1859 && !INTEGRAL_TYPE_P (comp_scalar_type))
1860 || !INTEGRAL_TYPE_P (type))
1861 return NULL;
1863 if ((TREE_CODE (then_clause) != INTEGER_CST
1864 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
1865 &def_stmt0, &promotion))
1866 || (TREE_CODE (else_clause) != INTEGER_CST
1867 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
1868 &def_stmt1, &promotion)))
1869 return NULL;
1871 if (orig_type0 && orig_type1
1872 && !types_compatible_p (orig_type0, orig_type1))
1873 return NULL;
1875 if (orig_type0)
1877 if (!types_compatible_p (orig_type0, comp_scalar_type))
1878 return NULL;
1879 then_clause = gimple_assign_rhs1 (def_stmt0);
1880 itype = orig_type0;
1883 if (orig_type1)
1885 if (!types_compatible_p (orig_type1, comp_scalar_type))
1886 return NULL;
1887 else_clause = gimple_assign_rhs1 (def_stmt1);
1888 itype = orig_type1;
1891 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1893 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1894 return NULL;
1896 vectype = get_vectype_for_scalar_type (type);
1897 if (vectype == NULL_TREE)
1898 return NULL;
1900 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1901 return NULL;
1903 if (itype == NULL_TREE)
1904 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1905 TYPE_UNSIGNED (type));
1907 if (itype == NULL_TREE
1908 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1909 return NULL;
1911 vecitype = get_vectype_for_scalar_type (itype);
1912 if (vecitype == NULL_TREE)
1913 return NULL;
1915 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1916 return NULL;
1918 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1920 if ((TREE_CODE (then_clause) == INTEGER_CST
1921 && !int_fits_type_p (then_clause, itype))
1922 || (TREE_CODE (else_clause) == INTEGER_CST
1923 && !int_fits_type_p (else_clause, itype)))
1924 return NULL;
1927 def_stmt
1928 = gimple_build_assign_with_ops3 (COND_EXPR,
1929 vect_recog_temp_ssa_var (itype, NULL),
1930 unshare_expr (cond_expr),
1931 fold_convert (itype, then_clause),
1932 fold_convert (itype, else_clause));
1933 pattern_stmt
1934 = gimple_build_assign_with_ops (NOP_EXPR,
1935 vect_recog_temp_ssa_var (type, NULL),
1936 gimple_assign_lhs (def_stmt), NULL_TREE);
1938 new_pattern_def_seq (stmt_vinfo, def_stmt);
1939 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1940 set_vinfo_for_stmt (def_stmt, def_stmt_info);
1941 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
1942 *type_in = vecitype;
1943 *type_out = vectype;
1945 if (vect_print_dump_info (REPORT_DETAILS))
1946 fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: ");
1948 return pattern_stmt;
1952 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1953 true if bool VAR can be optimized that way. */
1955 static bool
1956 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1958 gimple def_stmt;
1959 enum vect_def_type dt;
1960 tree def, rhs1;
1961 enum tree_code rhs_code;
1963 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
1964 &dt))
1965 return false;
1967 if (dt != vect_internal_def)
1968 return false;
1970 if (!is_gimple_assign (def_stmt))
1971 return false;
1973 if (!has_single_use (def))
1974 return false;
1976 rhs1 = gimple_assign_rhs1 (def_stmt);
1977 rhs_code = gimple_assign_rhs_code (def_stmt);
1978 switch (rhs_code)
1980 case SSA_NAME:
1981 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
1983 CASE_CONVERT:
1984 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
1985 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
1986 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
1987 return false;
1988 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
1990 case BIT_NOT_EXPR:
1991 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
1993 case BIT_AND_EXPR:
1994 case BIT_IOR_EXPR:
1995 case BIT_XOR_EXPR:
1996 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
1997 return false;
1998 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
1999 bb_vinfo);
2001 default:
2002 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2004 tree vecitype, comp_vectype;
2006 /* If the comparison can throw, then is_gimple_condexpr will be
2007 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2008 if (stmt_could_throw_p (def_stmt))
2009 return false;
2011 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2012 if (comp_vectype == NULL_TREE)
2013 return false;
2015 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2017 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2018 tree itype
2019 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2020 vecitype = get_vectype_for_scalar_type (itype);
2021 if (vecitype == NULL_TREE)
2022 return false;
2024 else
2025 vecitype = comp_vectype;
2026 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2028 return false;
2033 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2034 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2035 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2037 static tree
2038 adjust_bool_pattern_cast (tree type, tree var)
2040 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2041 gimple cast_stmt, pattern_stmt;
2043 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2044 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2045 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2046 cast_stmt
2047 = gimple_build_assign_with_ops (NOP_EXPR,
2048 vect_recog_temp_ssa_var (type, NULL),
2049 gimple_assign_lhs (pattern_stmt),
2050 NULL_TREE);
2051 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2052 return gimple_assign_lhs (cast_stmt);
2056 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2057 recursively. VAR is an SSA_NAME that should be transformed from bool
2058 to a wider integer type, OUT_TYPE is the desired final integer type of
2059 the whole pattern, TRUEVAL should be NULL unless optimizing
2060 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2061 in the then_clause, STMTS is where statements with added pattern stmts
2062 should be pushed to. */
2064 static tree
2065 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2066 VEC (gimple, heap) **stmts)
2068 gimple stmt = SSA_NAME_DEF_STMT (var);
2069 enum tree_code rhs_code, def_rhs_code;
2070 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2071 location_t loc;
2072 gimple pattern_stmt, def_stmt;
2074 rhs1 = gimple_assign_rhs1 (stmt);
2075 rhs2 = gimple_assign_rhs2 (stmt);
2076 rhs_code = gimple_assign_rhs_code (stmt);
2077 loc = gimple_location (stmt);
2078 switch (rhs_code)
2080 case SSA_NAME:
2081 CASE_CONVERT:
2082 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2083 itype = TREE_TYPE (irhs1);
2084 pattern_stmt
2085 = gimple_build_assign_with_ops (SSA_NAME,
2086 vect_recog_temp_ssa_var (itype, NULL),
2087 irhs1, NULL_TREE);
2088 break;
2090 case BIT_NOT_EXPR:
2091 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2092 itype = TREE_TYPE (irhs1);
2093 pattern_stmt
2094 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2095 vect_recog_temp_ssa_var (itype, NULL),
2096 irhs1, build_int_cst (itype, 1));
2097 break;
2099 case BIT_AND_EXPR:
2100 /* Try to optimize x = y & (a < b ? 1 : 0); into
2101 x = (a < b ? y : 0);
2103 E.g. for:
2104 bool a_b, b_b, c_b;
2105 TYPE d_T;
2107 S1 a_b = x1 CMP1 y1;
2108 S2 b_b = x2 CMP2 y2;
2109 S3 c_b = a_b & b_b;
2110 S4 d_T = (TYPE) c_b;
2112 we would normally emit:
2114 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2115 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2116 S3' c_T = a_T & b_T;
2117 S4' d_T = c_T;
2119 but we can save one stmt by using the
2120 result of one of the COND_EXPRs in the other COND_EXPR and leave
2121 BIT_AND_EXPR stmt out:
2123 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2124 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2125 S4' f_T = c_T;
2127 At least when VEC_COND_EXPR is implemented using masks
2128 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2129 computes the comparison masks and ands it, in one case with
2130 all ones vector, in the other case with a vector register.
2131 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2132 often more expensive. */
2133 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2134 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2135 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2137 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2138 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2139 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2140 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2142 gimple tstmt;
2143 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2144 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2145 tstmt = VEC_pop (gimple, *stmts);
2146 gcc_assert (tstmt == def_stmt);
2147 VEC_quick_push (gimple, *stmts, stmt);
2148 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2149 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2150 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2151 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2152 return irhs2;
2154 else
2155 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2156 goto and_ior_xor;
2158 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2159 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2160 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2162 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2163 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2164 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2165 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2167 gimple tstmt;
2168 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2169 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2170 tstmt = VEC_pop (gimple, *stmts);
2171 gcc_assert (tstmt == def_stmt);
2172 VEC_quick_push (gimple, *stmts, stmt);
2173 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2174 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2175 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2176 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2177 return irhs1;
2179 else
2180 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2181 goto and_ior_xor;
2183 /* FALLTHRU */
2184 case BIT_IOR_EXPR:
2185 case BIT_XOR_EXPR:
2186 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2187 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2188 and_ior_xor:
2189 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2190 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2192 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2193 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2194 int out_prec = TYPE_PRECISION (out_type);
2195 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2196 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2197 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2198 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2199 else
2201 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2202 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2205 itype = TREE_TYPE (irhs1);
2206 pattern_stmt
2207 = gimple_build_assign_with_ops (rhs_code,
2208 vect_recog_temp_ssa_var (itype, NULL),
2209 irhs1, irhs2);
2210 break;
2212 default:
2213 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2214 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2215 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2216 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2217 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2219 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2220 itype
2221 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2223 else
2224 itype = TREE_TYPE (rhs1);
2225 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2226 if (trueval == NULL_TREE)
2227 trueval = build_int_cst (itype, 1);
2228 else
2229 gcc_checking_assert (useless_type_conversion_p (itype,
2230 TREE_TYPE (trueval)));
2231 pattern_stmt
2232 = gimple_build_assign_with_ops3 (COND_EXPR,
2233 vect_recog_temp_ssa_var (itype, NULL),
2234 cond_expr, trueval,
2235 build_int_cst (itype, 0));
2236 break;
2239 VEC_safe_push (gimple, heap, *stmts, stmt);
2240 gimple_set_location (pattern_stmt, loc);
2241 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2242 return gimple_assign_lhs (pattern_stmt);
2246 /* Function vect_recog_bool_pattern
2248 Try to find pattern like following:
2250 bool a_b, b_b, c_b, d_b, e_b;
2251 TYPE f_T;
2252 loop:
2253 S1 a_b = x1 CMP1 y1;
2254 S2 b_b = x2 CMP2 y2;
2255 S3 c_b = a_b & b_b;
2256 S4 d_b = x3 CMP3 y3;
2257 S5 e_b = c_b | d_b;
2258 S6 f_T = (TYPE) e_b;
2260 where type 'TYPE' is an integral type.
2262 Input:
2264 * LAST_STMT: A stmt at the end from which the pattern
2265 search begins, i.e. cast of a bool to
2266 an integer type.
2268 Output:
2270 * TYPE_IN: The type of the input arguments to the pattern.
2272 * TYPE_OUT: The type of the output of this pattern.
2274 * Return value: A new stmt that will be used to replace the pattern.
2276 Assuming size of TYPE is the same as size of all comparisons
2277 (otherwise some casts would be added where needed), the above
2278 sequence we create related pattern stmts:
2279 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2280 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2281 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2282 S5' e_T = c_T | d_T;
2283 S6' f_T = e_T;
2285 Instead of the above S3' we could emit:
2286 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2287 S3' c_T = a_T | b_T;
2288 but the above is more efficient. */
2290 static gimple
2291 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2292 tree *type_out)
2294 gimple last_stmt = VEC_pop (gimple, *stmts);
2295 enum tree_code rhs_code;
2296 tree var, lhs, rhs, vectype;
2297 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2298 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2299 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2300 gimple pattern_stmt;
2302 if (!is_gimple_assign (last_stmt))
2303 return NULL;
2305 var = gimple_assign_rhs1 (last_stmt);
2306 lhs = gimple_assign_lhs (last_stmt);
2308 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2309 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2310 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2311 return NULL;
2313 rhs_code = gimple_assign_rhs_code (last_stmt);
2314 if (CONVERT_EXPR_CODE_P (rhs_code))
2316 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2317 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2318 return NULL;
2319 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2320 if (vectype == NULL_TREE)
2321 return NULL;
2323 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2324 return NULL;
2326 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2327 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2328 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2329 pattern_stmt
2330 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2331 else
2332 pattern_stmt
2333 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2334 *type_out = vectype;
2335 *type_in = vectype;
2336 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2337 if (vect_print_dump_info (REPORT_DETAILS))
2338 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2340 return pattern_stmt;
2342 else if (rhs_code == SSA_NAME
2343 && STMT_VINFO_DATA_REF (stmt_vinfo))
2345 stmt_vec_info pattern_stmt_info;
2346 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2347 gcc_assert (vectype != NULL_TREE);
2348 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2349 return NULL;
2350 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2351 return NULL;
2353 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2354 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2355 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2357 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2358 gimple cast_stmt
2359 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2360 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2361 rhs = rhs2;
2363 pattern_stmt
2364 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2365 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2366 bb_vinfo);
2367 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2368 STMT_VINFO_DATA_REF (pattern_stmt_info)
2369 = STMT_VINFO_DATA_REF (stmt_vinfo);
2370 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2371 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2372 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2373 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2374 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2375 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2376 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2377 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2378 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2379 *type_out = vectype;
2380 *type_in = vectype;
2381 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2382 if (vect_print_dump_info (REPORT_DETAILS))
2383 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2384 return pattern_stmt;
2386 else
2387 return NULL;
2391 /* Mark statements that are involved in a pattern. */
2393 static inline void
2394 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2395 tree pattern_vectype)
2397 stmt_vec_info pattern_stmt_info, def_stmt_info;
2398 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2399 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2400 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2401 gimple def_stmt;
2403 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2404 if (pattern_stmt_info == NULL)
2406 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2407 bb_vinfo);
2408 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2410 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2412 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2413 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2414 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2415 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2416 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2417 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2418 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2419 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2420 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2422 gimple_stmt_iterator si;
2423 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2424 !gsi_end_p (si); gsi_next (&si))
2426 def_stmt = gsi_stmt (si);
2427 def_stmt_info = vinfo_for_stmt (def_stmt);
2428 if (def_stmt_info == NULL)
2430 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2431 bb_vinfo);
2432 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2434 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2435 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2436 STMT_VINFO_DEF_TYPE (def_stmt_info)
2437 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2438 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2439 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2444 /* Function vect_pattern_recog_1
2446 Input:
2447 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2448 computation pattern.
2449 STMT: A stmt from which the pattern search should start.
2451 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2452 expression that computes the same functionality and can be used to
2453 replace the sequence of stmts that are involved in the pattern.
2455 Output:
2456 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2457 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2458 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2459 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2460 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2461 to the available target pattern.
2463 This function also does some bookkeeping, as explained in the documentation
2464 for vect_recog_pattern. */
2466 static void
2467 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2468 gimple_stmt_iterator si,
2469 VEC (gimple, heap) **stmts_to_replace)
2471 gimple stmt = gsi_stmt (si), pattern_stmt;
2472 stmt_vec_info stmt_info;
2473 loop_vec_info loop_vinfo;
2474 tree pattern_vectype;
2475 tree type_in, type_out;
2476 enum tree_code code;
2477 int i;
2478 gimple next;
2480 VEC_truncate (gimple, *stmts_to_replace, 0);
2481 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2482 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2483 if (!pattern_stmt)
2484 return;
2486 stmt = VEC_last (gimple, *stmts_to_replace);
2487 stmt_info = vinfo_for_stmt (stmt);
2488 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2490 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2492 /* No need to check target support (already checked by the pattern
2493 recognition function). */
2494 pattern_vectype = type_out ? type_out : type_in;
2496 else
2498 enum machine_mode vec_mode;
2499 enum insn_code icode;
2500 optab optab;
2502 /* Check target support */
2503 type_in = get_vectype_for_scalar_type (type_in);
2504 if (!type_in)
2505 return;
2506 if (type_out)
2507 type_out = get_vectype_for_scalar_type (type_out);
2508 else
2509 type_out = type_in;
2510 if (!type_out)
2511 return;
2512 pattern_vectype = type_out;
2514 if (is_gimple_assign (pattern_stmt))
2515 code = gimple_assign_rhs_code (pattern_stmt);
2516 else
2518 gcc_assert (is_gimple_call (pattern_stmt));
2519 code = CALL_EXPR;
2522 optab = optab_for_tree_code (code, type_in, optab_default);
2523 vec_mode = TYPE_MODE (type_in);
2524 if (!optab
2525 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2526 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2527 return;
2530 /* Found a vectorizable pattern. */
2531 if (vect_print_dump_info (REPORT_DETAILS))
2533 fprintf (vect_dump, "pattern recognized: ");
2534 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2537 /* Mark the stmts that are involved in the pattern. */
2538 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2540 /* Patterns cannot be vectorized using SLP, because they change the order of
2541 computation. */
2542 if (loop_vinfo)
2543 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2544 if (next == stmt)
2545 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2547 /* It is possible that additional pattern stmts are created and inserted in
2548 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2549 relevant statements. */
2550 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2551 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2552 i++)
2554 stmt_info = vinfo_for_stmt (stmt);
2555 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2556 if (vect_print_dump_info (REPORT_DETAILS))
2558 fprintf (vect_dump, "additional pattern stmt: ");
2559 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2562 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2567 /* Function vect_pattern_recog
2569 Input:
2570 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2571 computation idioms.
2573 Output - for each computation idiom that is detected we create a new stmt
2574 that provides the same functionality and that can be vectorized. We
2575 also record some information in the struct_stmt_info of the relevant
2576 stmts, as explained below:
2578 At the entry to this function we have the following stmts, with the
2579 following initial value in the STMT_VINFO fields:
2581 stmt in_pattern_p related_stmt vec_stmt
2582 S1: a_i = .... - - -
2583 S2: a_2 = ..use(a_i).. - - -
2584 S3: a_1 = ..use(a_2).. - - -
2585 S4: a_0 = ..use(a_1).. - - -
2586 S5: ... = ..use(a_0).. - - -
2588 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2589 represented by a single stmt. We then:
2590 - create a new stmt S6 equivalent to the pattern (the stmt is not
2591 inserted into the code)
2592 - fill in the STMT_VINFO fields as follows:
2594 in_pattern_p related_stmt vec_stmt
2595 S1: a_i = .... - - -
2596 S2: a_2 = ..use(a_i).. - - -
2597 S3: a_1 = ..use(a_2).. - - -
2598 S4: a_0 = ..use(a_1).. true S6 -
2599 '---> S6: a_new = .... - S4 -
2600 S5: ... = ..use(a_0).. - - -
2602 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2603 to each other through the RELATED_STMT field).
2605 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2606 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2607 remain irrelevant unless used by stmts other than S4.
2609 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2610 (because they are marked as irrelevant). It will vectorize S6, and record
2611 a pointer to the new vector stmt VS6 from S6 (as usual).
2612 S4 will be skipped, and S5 will be vectorized as usual:
2614 in_pattern_p related_stmt vec_stmt
2615 S1: a_i = .... - - -
2616 S2: a_2 = ..use(a_i).. - - -
2617 S3: a_1 = ..use(a_2).. - - -
2618 > VS6: va_new = .... - - -
2619 S4: a_0 = ..use(a_1).. true S6 VS6
2620 '---> S6: a_new = .... - S4 VS6
2621 > VS5: ... = ..vuse(va_new).. - - -
2622 S5: ... = ..use(a_0).. - - -
2624 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2625 elsewhere), and we'll end up with:
2627 VS6: va_new = ....
2628 VS5: ... = ..vuse(va_new)..
2630 In case of more than one pattern statements, e.g., widen-mult with
2631 intermediate type:
2633 S1 a_t = ;
2634 S2 a_T = (TYPE) a_t;
2635 '--> S3: a_it = (interm_type) a_t;
2636 S4 prod_T = a_T * CONST;
2637 '--> S5: prod_T' = a_it w* CONST;
2639 there may be other users of a_T outside the pattern. In that case S2 will
2640 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2641 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2642 be recorded in S3. */
2644 void
2645 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2647 struct loop *loop;
2648 basic_block *bbs, bb;
2649 unsigned int nbbs;
2650 gimple_stmt_iterator si;
2651 unsigned int i, j;
2652 vect_recog_func_ptr vect_recog_func;
2653 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2654 gimple stmt;
2656 if (vect_print_dump_info (REPORT_DETAILS))
2657 fprintf (vect_dump, "=== vect_pattern_recog ===");
2659 if (loop_vinfo)
2661 loop = LOOP_VINFO_LOOP (loop_vinfo);
2662 bbs = LOOP_VINFO_BBS (loop_vinfo);
2663 nbbs = loop->num_nodes;
2665 else
2667 bb = BB_VINFO_BB (bb_vinfo);
2668 nbbs = 1;
2669 bbs = XNEW (basic_block);
2670 bbs[0] = bb;
2673 /* Scan through the loop stmts, applying the pattern recognition
2674 functions starting at each stmt visited: */
2675 for (i = 0; i < nbbs; i++)
2677 basic_block bb = bbs[i];
2678 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2680 if (bb_vinfo && (stmt = gsi_stmt (si))
2681 && vinfo_for_stmt (stmt)
2682 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2683 continue;
2685 /* Scan over all generic vect_recog_xxx_pattern functions. */
2686 for (j = 0; j < NUM_PATTERNS; j++)
2688 vect_recog_func = vect_vect_recog_func_ptrs[j];
2689 vect_pattern_recog_1 (vect_recog_func, si,
2690 &stmts_to_replace);
2695 VEC_free (gimple, heap, stmts_to_replace);
2696 if (bb_vinfo)
2697 free (bbs);