gcc/c-family/
[official-gcc.git] / gcc / tree-vect-patterns.c
blob0992fbc9c7358882ebc00086b47fb1637cbe71be
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "gimple.h"
31 #include "gimplify.h"
32 #include "gimple-iterator.h"
33 #include "gimple-ssa.h"
34 #include "tree-phinodes.h"
35 #include "ssa-iterators.h"
36 #include "tree-ssanames.h"
37 #include "cfgloop.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "params.h"
41 #include "tree-data-ref.h"
42 #include "tree-vectorizer.h"
43 #include "recog.h" /* FIXME: for insn_data */
44 #include "diagnostic-core.h"
45 #include "dumpfile.h"
47 /* Pattern recognition functions */
48 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
49 tree *);
50 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
51 tree *);
52 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
53 tree *);
54 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
55 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
56 tree *);
57 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
58 tree *, tree *);
59 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
60 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
61 tree *, tree *);
62 static gimple vect_recog_divmod_pattern (vec<gimple> *,
63 tree *, tree *);
64 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
65 tree *, tree *);
66 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
67 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
68 vect_recog_widen_mult_pattern,
69 vect_recog_widen_sum_pattern,
70 vect_recog_dot_prod_pattern,
71 vect_recog_pow_pattern,
72 vect_recog_widen_shift_pattern,
73 vect_recog_over_widening_pattern,
74 vect_recog_rotate_pattern,
75 vect_recog_vector_vector_shift_pattern,
76 vect_recog_divmod_pattern,
77 vect_recog_mixed_size_cond_pattern,
78 vect_recog_bool_pattern};
80 static inline void
81 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
83 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
84 stmt);
87 static inline void
88 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
90 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
91 append_pattern_def_seq (stmt_info, stmt);
94 /* Check whether STMT2 is in the same loop or basic block as STMT1.
95 Which of the two applies depends on whether we're currently doing
96 loop-based or basic-block-based vectorization, as determined by
97 the vinfo_for_stmt for STMT1 (which must be defined).
99 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
100 to be defined as well. */
102 static bool
103 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
105 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
106 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
107 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
109 if (!gimple_bb (stmt2))
110 return false;
112 if (loop_vinfo)
114 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
115 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
116 return false;
118 else
120 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
121 || gimple_code (stmt2) == GIMPLE_PHI)
122 return false;
125 gcc_assert (vinfo_for_stmt (stmt2));
126 return true;
129 /* If the LHS of DEF_STMT has a single use, and that statement is
130 in the same loop or basic block, return it. */
132 static gimple
133 vect_single_imm_use (gimple def_stmt)
135 tree lhs = gimple_assign_lhs (def_stmt);
136 use_operand_p use_p;
137 gimple use_stmt;
139 if (!single_imm_use (lhs, &use_p, &use_stmt))
140 return NULL;
142 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
143 return NULL;
145 return use_stmt;
148 /* Check whether NAME, an ssa-name used in USE_STMT,
149 is a result of a type promotion or demotion, such that:
150 DEF_STMT: NAME = NOP (name0)
151 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
152 If CHECK_SIGN is TRUE, check that either both types are signed or both are
153 unsigned. */
155 static bool
156 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
157 tree *orig_type, gimple *def_stmt, bool *promotion)
159 tree dummy;
160 gimple dummy_gimple;
161 loop_vec_info loop_vinfo;
162 stmt_vec_info stmt_vinfo;
163 tree type = TREE_TYPE (name);
164 tree oprnd0;
165 enum vect_def_type dt;
166 tree def;
167 bb_vec_info bb_vinfo;
169 stmt_vinfo = vinfo_for_stmt (use_stmt);
170 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
171 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
172 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
173 &def, &dt))
174 return false;
176 if (dt != vect_internal_def
177 && dt != vect_external_def && dt != vect_constant_def)
178 return false;
180 if (!*def_stmt)
181 return false;
183 if (!is_gimple_assign (*def_stmt))
184 return false;
186 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
187 return false;
189 oprnd0 = gimple_assign_rhs1 (*def_stmt);
191 *orig_type = TREE_TYPE (oprnd0);
192 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
193 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
194 return false;
196 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
197 *promotion = true;
198 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
199 *promotion = false;
200 else
201 return false;
203 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
204 bb_vinfo, &dummy_gimple, &dummy, &dt))
205 return false;
207 return true;
210 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
211 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
213 static tree
214 vect_recog_temp_ssa_var (tree type, gimple stmt)
216 return make_temp_ssa_name (type, stmt, "patt");
219 /* Function vect_recog_dot_prod_pattern
221 Try to find the following pattern:
223 type x_t, y_t;
224 TYPE1 prod;
225 TYPE2 sum = init;
226 loop:
227 sum_0 = phi <init, sum_1>
228 S1 x_t = ...
229 S2 y_t = ...
230 S3 x_T = (TYPE1) x_t;
231 S4 y_T = (TYPE1) y_t;
232 S5 prod = x_T * y_T;
233 [S6 prod = (TYPE2) prod; #optional]
234 S7 sum_1 = prod + sum_0;
236 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
237 same size of 'TYPE1' or bigger. This is a special case of a reduction
238 computation.
240 Input:
242 * STMTS: Contains a stmt from which the pattern search begins. In the
243 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
244 will be detected.
246 Output:
248 * TYPE_IN: The type of the input arguments to the pattern.
250 * TYPE_OUT: The type of the output of this pattern.
252 * Return value: A new stmt that will be used to replace the sequence of
253 stmts that constitute the pattern. In this case it will be:
254 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
256 Note: The dot-prod idiom is a widening reduction pattern that is
257 vectorized without preserving all the intermediate results. It
258 produces only N/2 (widened) results (by summing up pairs of
259 intermediate results) rather than all N results. Therefore, we
260 cannot allow this pattern when we want to get all the results and in
261 the correct order (as is the case when this computation is in an
262 inner-loop nested in an outer-loop that us being vectorized). */
264 static gimple
265 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
266 tree *type_out)
268 gimple stmt, last_stmt = (*stmts)[0];
269 tree oprnd0, oprnd1;
270 tree oprnd00, oprnd01;
271 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
272 tree type, half_type;
273 gimple pattern_stmt;
274 tree prod_type;
275 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
276 struct loop *loop;
277 tree var;
278 bool promotion;
280 if (!loop_info)
281 return NULL;
283 loop = LOOP_VINFO_LOOP (loop_info);
285 if (!is_gimple_assign (last_stmt))
286 return NULL;
288 type = gimple_expr_type (last_stmt);
290 /* Look for the following pattern
291 DX = (TYPE1) X;
292 DY = (TYPE1) Y;
293 DPROD = DX * DY;
294 DDPROD = (TYPE2) DPROD;
295 sum_1 = DDPROD + sum_0;
296 In which
297 - DX is double the size of X
298 - DY is double the size of Y
299 - DX, DY, DPROD all have the same type
300 - sum is the same size of DPROD or bigger
301 - sum has been recognized as a reduction variable.
303 This is equivalent to:
304 DPROD = X w* Y; #widen mult
305 sum_1 = DPROD w+ sum_0; #widen summation
307 DPROD = X w* Y; #widen mult
308 sum_1 = DPROD + sum_0; #summation
311 /* Starting from LAST_STMT, follow the defs of its uses in search
312 of the above pattern. */
314 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
315 return NULL;
317 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
319 /* Has been detected as widening-summation? */
321 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
322 type = gimple_expr_type (stmt);
323 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
324 return NULL;
325 oprnd0 = gimple_assign_rhs1 (stmt);
326 oprnd1 = gimple_assign_rhs2 (stmt);
327 half_type = TREE_TYPE (oprnd0);
329 else
331 gimple def_stmt;
333 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
334 return NULL;
335 oprnd0 = gimple_assign_rhs1 (last_stmt);
336 oprnd1 = gimple_assign_rhs2 (last_stmt);
337 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
338 || !types_compatible_p (TREE_TYPE (oprnd1), type))
339 return NULL;
340 stmt = last_stmt;
342 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
343 &promotion)
344 && promotion)
346 stmt = def_stmt;
347 oprnd0 = gimple_assign_rhs1 (stmt);
349 else
350 half_type = type;
353 /* So far so good. Since last_stmt was detected as a (summation) reduction,
354 we know that oprnd1 is the reduction variable (defined by a loop-header
355 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
356 Left to check that oprnd0 is defined by a (widen_)mult_expr */
357 if (TREE_CODE (oprnd0) != SSA_NAME)
358 return NULL;
360 prod_type = half_type;
361 stmt = SSA_NAME_DEF_STMT (oprnd0);
363 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
364 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
365 return NULL;
367 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
368 inside the loop (in case we are analyzing an outer-loop). */
369 if (!is_gimple_assign (stmt))
370 return NULL;
371 stmt_vinfo = vinfo_for_stmt (stmt);
372 gcc_assert (stmt_vinfo);
373 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
374 return NULL;
375 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
376 return NULL;
377 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
379 /* Has been detected as a widening multiplication? */
381 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
382 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
383 return NULL;
384 stmt_vinfo = vinfo_for_stmt (stmt);
385 gcc_assert (stmt_vinfo);
386 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
387 oprnd00 = gimple_assign_rhs1 (stmt);
388 oprnd01 = gimple_assign_rhs2 (stmt);
390 else
392 tree half_type0, half_type1;
393 gimple def_stmt;
394 tree oprnd0, oprnd1;
396 oprnd0 = gimple_assign_rhs1 (stmt);
397 oprnd1 = gimple_assign_rhs2 (stmt);
398 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
399 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
400 return NULL;
401 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
402 &promotion)
403 || !promotion)
404 return NULL;
405 oprnd00 = gimple_assign_rhs1 (def_stmt);
406 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
407 &promotion)
408 || !promotion)
409 return NULL;
410 oprnd01 = gimple_assign_rhs1 (def_stmt);
411 if (!types_compatible_p (half_type0, half_type1))
412 return NULL;
413 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
414 return NULL;
417 half_type = TREE_TYPE (oprnd00);
418 *type_in = half_type;
419 *type_out = type;
421 /* Pattern detected. Create a stmt to be used to replace the pattern: */
422 var = vect_recog_temp_ssa_var (type, NULL);
423 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
424 oprnd00, oprnd01, oprnd1);
426 if (dump_enabled_p ())
428 dump_printf_loc (MSG_NOTE, vect_location,
429 "vect_recog_dot_prod_pattern: detected: ");
430 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
431 dump_printf (MSG_NOTE, "\n");
434 /* We don't allow changing the order of the computation in the inner-loop
435 when doing outer-loop vectorization. */
436 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
438 return pattern_stmt;
442 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
443 and LSHIFT_EXPR.
445 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
446 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
448 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
449 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
450 that satisfies the above restrictions, we can perform a widening opeartion
451 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
452 with a_it = (interm_type) a_t; */
454 static bool
455 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
456 tree const_oprnd, tree *oprnd,
457 vec<gimple> *stmts, tree type,
458 tree *half_type, gimple def_stmt)
460 tree new_type, new_oprnd;
461 gimple new_stmt;
463 if (code != MULT_EXPR && code != LSHIFT_EXPR)
464 return false;
466 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
467 || (code == LSHIFT_EXPR
468 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
469 != 1))
470 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
472 /* CONST_OPRND is a constant of HALF_TYPE. */
473 *oprnd = gimple_assign_rhs1 (def_stmt);
474 return true;
477 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
478 return false;
480 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
481 return false;
483 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
484 a type 2 times bigger than HALF_TYPE. */
485 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
486 TYPE_UNSIGNED (type));
487 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
488 || (code == LSHIFT_EXPR
489 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
490 return false;
492 /* Use NEW_TYPE for widening operation. */
493 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
495 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
496 /* Check if the already created pattern stmt is what we need. */
497 if (!is_gimple_assign (new_stmt)
498 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
499 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
500 return false;
502 stmts->safe_push (def_stmt);
503 *oprnd = gimple_assign_lhs (new_stmt);
505 else
507 /* Create a_T = (NEW_TYPE) a_t; */
508 *oprnd = gimple_assign_rhs1 (def_stmt);
509 new_oprnd = make_ssa_name (new_type, NULL);
510 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
511 NULL_TREE);
512 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
513 stmts->safe_push (def_stmt);
514 *oprnd = new_oprnd;
517 *half_type = new_type;
518 return true;
522 /* Function vect_recog_widen_mult_pattern
524 Try to find the following pattern:
526 type a_t, b_t;
527 TYPE a_T, b_T, prod_T;
529 S1 a_t = ;
530 S2 b_t = ;
531 S3 a_T = (TYPE) a_t;
532 S4 b_T = (TYPE) b_t;
533 S5 prod_T = a_T * b_T;
535 where type 'TYPE' is at least double the size of type 'type'.
537 Also detect unsigned cases:
539 unsigned type a_t, b_t;
540 unsigned TYPE u_prod_T;
541 TYPE a_T, b_T, prod_T;
543 S1 a_t = ;
544 S2 b_t = ;
545 S3 a_T = (TYPE) a_t;
546 S4 b_T = (TYPE) b_t;
547 S5 prod_T = a_T * b_T;
548 S6 u_prod_T = (unsigned TYPE) prod_T;
550 and multiplication by constants:
552 type a_t;
553 TYPE a_T, prod_T;
555 S1 a_t = ;
556 S3 a_T = (TYPE) a_t;
557 S5 prod_T = a_T * CONST;
559 A special case of multiplication by constants is when 'TYPE' is 4 times
560 bigger than 'type', but CONST fits an intermediate type 2 times smaller
561 than 'TYPE'. In that case we create an additional pattern stmt for S3
562 to create a variable of the intermediate type, and perform widen-mult
563 on the intermediate type as well:
565 type a_t;
566 interm_type a_it;
567 TYPE a_T, prod_T, prod_T';
569 S1 a_t = ;
570 S3 a_T = (TYPE) a_t;
571 '--> a_it = (interm_type) a_t;
572 S5 prod_T = a_T * CONST;
573 '--> prod_T' = a_it w* CONST;
575 Input/Output:
577 * STMTS: Contains a stmt from which the pattern search begins. In the
578 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
579 is detected. In case of unsigned widen-mult, the original stmt (S5) is
580 replaced with S6 in STMTS. In case of multiplication by a constant
581 of an intermediate type (the last case above), STMTS also contains S3
582 (inserted before S5).
584 Output:
586 * TYPE_IN: The type of the input arguments to the pattern.
588 * TYPE_OUT: The type of the output of this pattern.
590 * Return value: A new stmt that will be used to replace the sequence of
591 stmts that constitute the pattern. In this case it will be:
592 WIDEN_MULT <a_t, b_t>
595 static gimple
596 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
597 tree *type_in, tree *type_out)
599 gimple last_stmt = stmts->pop ();
600 gimple def_stmt0, def_stmt1;
601 tree oprnd0, oprnd1;
602 tree type, half_type0, half_type1;
603 gimple pattern_stmt;
604 tree vectype, vectype_out = NULL_TREE;
605 tree var;
606 enum tree_code dummy_code;
607 int dummy_int;
608 vec<tree> dummy_vec;
609 bool op1_ok;
610 bool promotion;
612 if (!is_gimple_assign (last_stmt))
613 return NULL;
615 type = gimple_expr_type (last_stmt);
617 /* Starting from LAST_STMT, follow the defs of its uses in search
618 of the above pattern. */
620 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
621 return NULL;
623 oprnd0 = gimple_assign_rhs1 (last_stmt);
624 oprnd1 = gimple_assign_rhs2 (last_stmt);
625 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
626 || !types_compatible_p (TREE_TYPE (oprnd1), type))
627 return NULL;
629 /* Check argument 0. */
630 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
631 &promotion)
632 || !promotion)
633 return NULL;
634 /* Check argument 1. */
635 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
636 &def_stmt1, &promotion);
638 if (op1_ok && promotion)
640 oprnd0 = gimple_assign_rhs1 (def_stmt0);
641 oprnd1 = gimple_assign_rhs1 (def_stmt1);
643 else
645 if (TREE_CODE (oprnd1) == INTEGER_CST
646 && TREE_CODE (half_type0) == INTEGER_TYPE
647 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
648 &oprnd0, stmts, type,
649 &half_type0, def_stmt0))
651 half_type1 = half_type0;
652 oprnd1 = fold_convert (half_type1, oprnd1);
654 else
655 return NULL;
658 /* Handle unsigned case. Look for
659 S6 u_prod_T = (unsigned TYPE) prod_T;
660 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
661 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
663 gimple use_stmt;
664 tree use_lhs;
665 tree use_type;
667 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
668 return NULL;
670 use_stmt = vect_single_imm_use (last_stmt);
671 if (!use_stmt || !is_gimple_assign (use_stmt)
672 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
673 return NULL;
675 use_lhs = gimple_assign_lhs (use_stmt);
676 use_type = TREE_TYPE (use_lhs);
677 if (!INTEGRAL_TYPE_P (use_type)
678 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
679 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
680 return NULL;
682 type = use_type;
683 last_stmt = use_stmt;
686 if (!types_compatible_p (half_type0, half_type1))
687 return NULL;
689 /* Pattern detected. */
690 if (dump_enabled_p ())
691 dump_printf_loc (MSG_NOTE, vect_location,
692 "vect_recog_widen_mult_pattern: detected:\n");
694 /* Check target support */
695 vectype = get_vectype_for_scalar_type (half_type0);
696 vectype_out = get_vectype_for_scalar_type (type);
697 if (!vectype
698 || !vectype_out
699 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
700 vectype_out, vectype,
701 &dummy_code, &dummy_code,
702 &dummy_int, &dummy_vec))
703 return NULL;
705 *type_in = vectype;
706 *type_out = vectype_out;
708 /* Pattern supported. Create a stmt to be used to replace the pattern: */
709 var = vect_recog_temp_ssa_var (type, NULL);
710 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
711 oprnd1);
713 if (dump_enabled_p ())
714 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
716 stmts->safe_push (last_stmt);
717 return pattern_stmt;
721 /* Function vect_recog_pow_pattern
723 Try to find the following pattern:
725 x = POW (y, N);
727 with POW being one of pow, powf, powi, powif and N being
728 either 2 or 0.5.
730 Input:
732 * LAST_STMT: A stmt from which the pattern search begins.
734 Output:
736 * TYPE_IN: The type of the input arguments to the pattern.
738 * TYPE_OUT: The type of the output of this pattern.
740 * Return value: A new stmt that will be used to replace the sequence of
741 stmts that constitute the pattern. In this case it will be:
742 x = x * x
744 x = sqrt (x)
747 static gimple
748 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
749 tree *type_out)
751 gimple last_stmt = (*stmts)[0];
752 tree fn, base, exp = NULL;
753 gimple stmt;
754 tree var;
756 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
757 return NULL;
759 fn = gimple_call_fndecl (last_stmt);
760 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
761 return NULL;
763 switch (DECL_FUNCTION_CODE (fn))
765 case BUILT_IN_POWIF:
766 case BUILT_IN_POWI:
767 case BUILT_IN_POWF:
768 case BUILT_IN_POW:
769 base = gimple_call_arg (last_stmt, 0);
770 exp = gimple_call_arg (last_stmt, 1);
771 if (TREE_CODE (exp) != REAL_CST
772 && TREE_CODE (exp) != INTEGER_CST)
773 return NULL;
774 break;
776 default:
777 return NULL;
780 /* We now have a pow or powi builtin function call with a constant
781 exponent. */
783 *type_out = NULL_TREE;
785 /* Catch squaring. */
786 if ((tree_fits_shwi_p (exp)
787 && tree_to_shwi (exp) == 2)
788 || (TREE_CODE (exp) == REAL_CST
789 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
791 *type_in = TREE_TYPE (base);
793 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
794 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
795 return stmt;
798 /* Catch square root. */
799 if (TREE_CODE (exp) == REAL_CST
800 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
802 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
803 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
804 if (*type_in)
806 gimple stmt = gimple_build_call (newfn, 1, base);
807 if (vectorizable_function (stmt, *type_in, *type_in)
808 != NULL_TREE)
810 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
811 gimple_call_set_lhs (stmt, var);
812 return stmt;
817 return NULL;
821 /* Function vect_recog_widen_sum_pattern
823 Try to find the following pattern:
825 type x_t;
826 TYPE x_T, sum = init;
827 loop:
828 sum_0 = phi <init, sum_1>
829 S1 x_t = *p;
830 S2 x_T = (TYPE) x_t;
831 S3 sum_1 = x_T + sum_0;
833 where type 'TYPE' is at least double the size of type 'type', i.e - we're
834 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
835 a special case of a reduction computation.
837 Input:
839 * LAST_STMT: A stmt from which the pattern search begins. In the example,
840 when this function is called with S3, the pattern {S2,S3} will be detected.
842 Output:
844 * TYPE_IN: The type of the input arguments to the pattern.
846 * TYPE_OUT: The type of the output of this pattern.
848 * Return value: A new stmt that will be used to replace the sequence of
849 stmts that constitute the pattern. In this case it will be:
850 WIDEN_SUM <x_t, sum_0>
852 Note: The widening-sum idiom is a widening reduction pattern that is
853 vectorized without preserving all the intermediate results. It
854 produces only N/2 (widened) results (by summing up pairs of
855 intermediate results) rather than all N results. Therefore, we
856 cannot allow this pattern when we want to get all the results and in
857 the correct order (as is the case when this computation is in an
858 inner-loop nested in an outer-loop that us being vectorized). */
860 static gimple
861 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
862 tree *type_out)
864 gimple stmt, last_stmt = (*stmts)[0];
865 tree oprnd0, oprnd1;
866 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
867 tree type, half_type;
868 gimple pattern_stmt;
869 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
870 struct loop *loop;
871 tree var;
872 bool promotion;
874 if (!loop_info)
875 return NULL;
877 loop = LOOP_VINFO_LOOP (loop_info);
879 if (!is_gimple_assign (last_stmt))
880 return NULL;
882 type = gimple_expr_type (last_stmt);
884 /* Look for the following pattern
885 DX = (TYPE) X;
886 sum_1 = DX + sum_0;
887 In which DX is at least double the size of X, and sum_1 has been
888 recognized as a reduction variable.
891 /* Starting from LAST_STMT, follow the defs of its uses in search
892 of the above pattern. */
894 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
895 return NULL;
897 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
898 return NULL;
900 oprnd0 = gimple_assign_rhs1 (last_stmt);
901 oprnd1 = gimple_assign_rhs2 (last_stmt);
902 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
903 || !types_compatible_p (TREE_TYPE (oprnd1), type))
904 return NULL;
906 /* So far so good. Since last_stmt was detected as a (summation) reduction,
907 we know that oprnd1 is the reduction variable (defined by a loop-header
908 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
909 Left to check that oprnd0 is defined by a cast from type 'type' to type
910 'TYPE'. */
912 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
913 &promotion)
914 || !promotion)
915 return NULL;
917 oprnd0 = gimple_assign_rhs1 (stmt);
918 *type_in = half_type;
919 *type_out = type;
921 /* Pattern detected. Create a stmt to be used to replace the pattern: */
922 var = vect_recog_temp_ssa_var (type, NULL);
923 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
924 oprnd0, oprnd1);
926 if (dump_enabled_p ())
928 dump_printf_loc (MSG_NOTE, vect_location,
929 "vect_recog_widen_sum_pattern: detected: ");
930 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
931 dump_printf (MSG_NOTE, "\n");
934 /* We don't allow changing the order of the computation in the inner-loop
935 when doing outer-loop vectorization. */
936 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
938 return pattern_stmt;
942 /* Return TRUE if the operation in STMT can be performed on a smaller type.
944 Input:
945 STMT - a statement to check.
946 DEF - we support operations with two operands, one of which is constant.
947 The other operand can be defined by a demotion operation, or by a
948 previous statement in a sequence of over-promoted operations. In the
949 later case DEF is used to replace that operand. (It is defined by a
950 pattern statement we created for the previous statement in the
951 sequence).
953 Input/output:
954 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
955 NULL, it's the type of DEF.
956 STMTS - additional pattern statements. If a pattern statement (type
957 conversion) is created in this function, its original statement is
958 added to STMTS.
960 Output:
961 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
962 operands to use in the new pattern statement for STMT (will be created
963 in vect_recog_over_widening_pattern ()).
964 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
965 statements for STMT: the first one is a type promotion and the second
966 one is the operation itself. We return the type promotion statement
967 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
968 the second pattern statement. */
970 static bool
971 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
972 tree *op0, tree *op1, gimple *new_def_stmt,
973 vec<gimple> *stmts)
975 enum tree_code code;
976 tree const_oprnd, oprnd;
977 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
978 gimple def_stmt, new_stmt;
979 bool first = false;
980 bool promotion;
982 *op0 = NULL_TREE;
983 *op1 = NULL_TREE;
984 *new_def_stmt = NULL;
986 if (!is_gimple_assign (stmt))
987 return false;
989 code = gimple_assign_rhs_code (stmt);
990 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
991 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
992 return false;
994 oprnd = gimple_assign_rhs1 (stmt);
995 const_oprnd = gimple_assign_rhs2 (stmt);
996 type = gimple_expr_type (stmt);
998 if (TREE_CODE (oprnd) != SSA_NAME
999 || TREE_CODE (const_oprnd) != INTEGER_CST)
1000 return false;
1002 /* If oprnd has other uses besides that in stmt we cannot mark it
1003 as being part of a pattern only. */
1004 if (!has_single_use (oprnd))
1005 return false;
1007 /* If we are in the middle of a sequence, we use DEF from a previous
1008 statement. Otherwise, OPRND has to be a result of type promotion. */
1009 if (*new_type)
1011 half_type = *new_type;
1012 oprnd = def;
1014 else
1016 first = true;
1017 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1018 &promotion)
1019 || !promotion
1020 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1021 return false;
1024 /* Can we perform the operation on a smaller type? */
1025 switch (code)
1027 case BIT_IOR_EXPR:
1028 case BIT_XOR_EXPR:
1029 case BIT_AND_EXPR:
1030 if (!int_fits_type_p (const_oprnd, half_type))
1032 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1033 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1034 return false;
1036 interm_type = build_nonstandard_integer_type (
1037 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1038 if (!int_fits_type_p (const_oprnd, interm_type))
1039 return false;
1042 break;
1044 case LSHIFT_EXPR:
1045 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1046 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1047 return false;
1049 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1050 (e.g., if the original value was char, the shift amount is at most 8
1051 if we want to use short). */
1052 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1053 return false;
1055 interm_type = build_nonstandard_integer_type (
1056 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1058 if (!vect_supportable_shift (code, interm_type))
1059 return false;
1061 break;
1063 case RSHIFT_EXPR:
1064 if (vect_supportable_shift (code, half_type))
1065 break;
1067 /* Try intermediate type - HALF_TYPE is not supported. */
1068 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1069 return false;
1071 interm_type = build_nonstandard_integer_type (
1072 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1074 if (!vect_supportable_shift (code, interm_type))
1075 return false;
1077 break;
1079 default:
1080 gcc_unreachable ();
1083 /* There are four possible cases:
1084 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1085 the first statement in the sequence)
1086 a. The original, HALF_TYPE, is not enough - we replace the promotion
1087 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1088 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1089 promotion.
1090 2. OPRND is defined by a pattern statement we created.
1091 a. Its type is not sufficient for the operation, we create a new stmt:
1092 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1093 this statement in NEW_DEF_STMT, and it is later put in
1094 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1095 b. OPRND is good to use in the new statement. */
1096 if (first)
1098 if (interm_type)
1100 /* Replace the original type conversion HALF_TYPE->TYPE with
1101 HALF_TYPE->INTERM_TYPE. */
1102 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1104 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1105 /* Check if the already created pattern stmt is what we need. */
1106 if (!is_gimple_assign (new_stmt)
1107 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1108 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1109 return false;
1111 stmts->safe_push (def_stmt);
1112 oprnd = gimple_assign_lhs (new_stmt);
1114 else
1116 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1117 oprnd = gimple_assign_rhs1 (def_stmt);
1118 new_oprnd = make_ssa_name (interm_type, NULL);
1119 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1120 oprnd, NULL_TREE);
1121 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1122 stmts->safe_push (def_stmt);
1123 oprnd = new_oprnd;
1126 else
1128 /* Retrieve the operand before the type promotion. */
1129 oprnd = gimple_assign_rhs1 (def_stmt);
1132 else
1134 if (interm_type)
1136 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1137 new_oprnd = make_ssa_name (interm_type, NULL);
1138 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1139 oprnd, NULL_TREE);
1140 oprnd = new_oprnd;
1141 *new_def_stmt = new_stmt;
1144 /* Otherwise, OPRND is already set. */
1147 if (interm_type)
1148 *new_type = interm_type;
1149 else
1150 *new_type = half_type;
1152 *op0 = oprnd;
1153 *op1 = fold_convert (*new_type, const_oprnd);
1155 return true;
1159 /* Try to find a statement or a sequence of statements that can be performed
1160 on a smaller type:
1162 type x_t;
1163 TYPE x_T, res0_T, res1_T;
1164 loop:
1165 S1 x_t = *p;
1166 S2 x_T = (TYPE) x_t;
1167 S3 res0_T = op (x_T, C0);
1168 S4 res1_T = op (res0_T, C1);
1169 S5 ... = () res1_T; - type demotion
1171 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1172 constants.
1173 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1174 be 'type' or some intermediate type. For now, we expect S5 to be a type
1175 demotion operation. We also check that S3 and S4 have only one use. */
1177 static gimple
1178 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1179 tree *type_in, tree *type_out)
1181 gimple stmt = stmts->pop ();
1182 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1183 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1184 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1185 bool first;
1186 tree type = NULL;
1188 first = true;
1189 while (1)
1191 if (!vinfo_for_stmt (stmt)
1192 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1193 return NULL;
1195 new_def_stmt = NULL;
1196 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1197 &op0, &op1, &new_def_stmt,
1198 stmts))
1200 if (first)
1201 return NULL;
1202 else
1203 break;
1206 /* STMT can be performed on a smaller type. Check its uses. */
1207 use_stmt = vect_single_imm_use (stmt);
1208 if (!use_stmt || !is_gimple_assign (use_stmt))
1209 return NULL;
1211 /* Create pattern statement for STMT. */
1212 vectype = get_vectype_for_scalar_type (new_type);
1213 if (!vectype)
1214 return NULL;
1216 /* We want to collect all the statements for which we create pattern
1217 statetments, except for the case when the last statement in the
1218 sequence doesn't have a corresponding pattern statement. In such
1219 case we associate the last pattern statement with the last statement
1220 in the sequence. Therefore, we only add the original statement to
1221 the list if we know that it is not the last. */
1222 if (prev_stmt)
1223 stmts->safe_push (prev_stmt);
1225 var = vect_recog_temp_ssa_var (new_type, NULL);
1226 pattern_stmt
1227 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1228 op0, op1);
1229 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1230 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1232 if (dump_enabled_p ())
1234 dump_printf_loc (MSG_NOTE, vect_location,
1235 "created pattern stmt: ");
1236 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1237 dump_printf (MSG_NOTE, "\n");
1240 type = gimple_expr_type (stmt);
1241 prev_stmt = stmt;
1242 stmt = use_stmt;
1244 first = false;
1247 /* We got a sequence. We expect it to end with a type demotion operation.
1248 Otherwise, we quit (for now). There are three possible cases: the
1249 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1250 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1251 NEW_TYPE differs (we create a new conversion statement). */
1252 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1254 use_lhs = gimple_assign_lhs (use_stmt);
1255 use_type = TREE_TYPE (use_lhs);
1256 /* Support only type demotion or signedess change. */
1257 if (!INTEGRAL_TYPE_P (use_type)
1258 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1259 return NULL;
1261 /* Check that NEW_TYPE is not bigger than the conversion result. */
1262 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1263 return NULL;
1265 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1266 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1268 /* Create NEW_TYPE->USE_TYPE conversion. */
1269 new_oprnd = make_ssa_name (use_type, NULL);
1270 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1271 var, NULL_TREE);
1272 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1274 *type_in = get_vectype_for_scalar_type (new_type);
1275 *type_out = get_vectype_for_scalar_type (use_type);
1277 /* We created a pattern statement for the last statement in the
1278 sequence, so we don't need to associate it with the pattern
1279 statement created for PREV_STMT. Therefore, we add PREV_STMT
1280 to the list in order to mark it later in vect_pattern_recog_1. */
1281 if (prev_stmt)
1282 stmts->safe_push (prev_stmt);
1284 else
1286 if (prev_stmt)
1287 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1288 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1290 *type_in = vectype;
1291 *type_out = NULL_TREE;
1294 stmts->safe_push (use_stmt);
1296 else
1297 /* TODO: support general case, create a conversion to the correct type. */
1298 return NULL;
1300 /* Pattern detected. */
1301 if (dump_enabled_p ())
1303 dump_printf_loc (MSG_NOTE, vect_location,
1304 "vect_recog_over_widening_pattern: detected: ");
1305 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1306 dump_printf (MSG_NOTE, "\n");
1309 return pattern_stmt;
1312 /* Detect widening shift pattern:
1314 type a_t;
1315 TYPE a_T, res_T;
1317 S1 a_t = ;
1318 S2 a_T = (TYPE) a_t;
1319 S3 res_T = a_T << CONST;
1321 where type 'TYPE' is at least double the size of type 'type'.
1323 Also detect cases where the shift result is immediately converted
1324 to another type 'result_type' that is no larger in size than 'TYPE'.
1325 In those cases we perform a widen-shift that directly results in
1326 'result_type', to avoid a possible over-widening situation:
1328 type a_t;
1329 TYPE a_T, res_T;
1330 result_type res_result;
1332 S1 a_t = ;
1333 S2 a_T = (TYPE) a_t;
1334 S3 res_T = a_T << CONST;
1335 S4 res_result = (result_type) res_T;
1336 '--> res_result' = a_t w<< CONST;
1338 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1339 create an additional pattern stmt for S2 to create a variable of an
1340 intermediate type, and perform widen-shift on the intermediate type:
1342 type a_t;
1343 interm_type a_it;
1344 TYPE a_T, res_T, res_T';
1346 S1 a_t = ;
1347 S2 a_T = (TYPE) a_t;
1348 '--> a_it = (interm_type) a_t;
1349 S3 res_T = a_T << CONST;
1350 '--> res_T' = a_it <<* CONST;
1352 Input/Output:
1354 * STMTS: Contains a stmt from which the pattern search begins.
1355 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1356 in STMTS. When an intermediate type is used and a pattern statement is
1357 created for S2, we also put S2 here (before S3).
1359 Output:
1361 * TYPE_IN: The type of the input arguments to the pattern.
1363 * TYPE_OUT: The type of the output of this pattern.
1365 * Return value: A new stmt that will be used to replace the sequence of
1366 stmts that constitute the pattern. In this case it will be:
1367 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1369 static gimple
1370 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1371 tree *type_in, tree *type_out)
1373 gimple last_stmt = stmts->pop ();
1374 gimple def_stmt0;
1375 tree oprnd0, oprnd1;
1376 tree type, half_type0;
1377 gimple pattern_stmt;
1378 tree vectype, vectype_out = NULL_TREE;
1379 tree var;
1380 enum tree_code dummy_code;
1381 int dummy_int;
1382 vec<tree> dummy_vec;
1383 gimple use_stmt;
1384 bool promotion;
1386 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1387 return NULL;
1389 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1390 return NULL;
1392 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1393 return NULL;
1395 oprnd0 = gimple_assign_rhs1 (last_stmt);
1396 oprnd1 = gimple_assign_rhs2 (last_stmt);
1397 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1398 return NULL;
1400 /* Check operand 0: it has to be defined by a type promotion. */
1401 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1402 &promotion)
1403 || !promotion)
1404 return NULL;
1406 /* Check operand 1: has to be positive. We check that it fits the type
1407 in vect_handle_widen_op_by_const (). */
1408 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1409 return NULL;
1411 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1412 type = gimple_expr_type (last_stmt);
1414 /* Check for subsequent conversion to another type. */
1415 use_stmt = vect_single_imm_use (last_stmt);
1416 if (use_stmt && is_gimple_assign (use_stmt)
1417 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1418 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1420 tree use_lhs = gimple_assign_lhs (use_stmt);
1421 tree use_type = TREE_TYPE (use_lhs);
1423 if (INTEGRAL_TYPE_P (use_type)
1424 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1426 last_stmt = use_stmt;
1427 type = use_type;
1431 /* Check if this a widening operation. */
1432 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1433 &oprnd0, stmts,
1434 type, &half_type0, def_stmt0))
1435 return NULL;
1437 /* Pattern detected. */
1438 if (dump_enabled_p ())
1439 dump_printf_loc (MSG_NOTE, vect_location,
1440 "vect_recog_widen_shift_pattern: detected:\n");
1442 /* Check target support. */
1443 vectype = get_vectype_for_scalar_type (half_type0);
1444 vectype_out = get_vectype_for_scalar_type (type);
1446 if (!vectype
1447 || !vectype_out
1448 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1449 vectype_out, vectype,
1450 &dummy_code, &dummy_code,
1451 &dummy_int, &dummy_vec))
1452 return NULL;
1454 *type_in = vectype;
1455 *type_out = vectype_out;
1457 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1458 var = vect_recog_temp_ssa_var (type, NULL);
1459 pattern_stmt =
1460 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1462 if (dump_enabled_p ())
1463 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1465 stmts->safe_push (last_stmt);
1466 return pattern_stmt;
1469 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1471 type a_t, b_t, c_t;
1473 S0 a_t = b_t r<< c_t;
1475 Input/Output:
1477 * STMTS: Contains a stmt from which the pattern search begins,
1478 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1479 with a sequence:
1481 S1 d_t = -c_t;
1482 S2 e_t = d_t & (B - 1);
1483 S3 f_t = b_t << c_t;
1484 S4 g_t = b_t >> e_t;
1485 S0 a_t = f_t | g_t;
1487 where B is element bitsize of type.
1489 Output:
1491 * TYPE_IN: The type of the input arguments to the pattern.
1493 * TYPE_OUT: The type of the output of this pattern.
1495 * Return value: A new stmt that will be used to replace the rotate
1496 S0 stmt. */
1498 static gimple
1499 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1501 gimple last_stmt = stmts->pop ();
1502 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1503 gimple pattern_stmt, def_stmt;
1504 enum tree_code rhs_code;
1505 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1506 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1507 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1508 enum vect_def_type dt;
1509 optab optab1, optab2;
1510 edge ext_def = NULL;
1512 if (!is_gimple_assign (last_stmt))
1513 return NULL;
1515 rhs_code = gimple_assign_rhs_code (last_stmt);
1516 switch (rhs_code)
1518 case LROTATE_EXPR:
1519 case RROTATE_EXPR:
1520 break;
1521 default:
1522 return NULL;
1525 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1526 return NULL;
1528 lhs = gimple_assign_lhs (last_stmt);
1529 oprnd0 = gimple_assign_rhs1 (last_stmt);
1530 type = TREE_TYPE (oprnd0);
1531 oprnd1 = gimple_assign_rhs2 (last_stmt);
1532 if (TREE_CODE (oprnd0) != SSA_NAME
1533 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1534 || !INTEGRAL_TYPE_P (type)
1535 || !TYPE_UNSIGNED (type))
1536 return NULL;
1538 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1539 &def, &dt))
1540 return NULL;
1542 if (dt != vect_internal_def
1543 && dt != vect_constant_def
1544 && dt != vect_external_def)
1545 return NULL;
1547 vectype = get_vectype_for_scalar_type (type);
1548 if (vectype == NULL_TREE)
1549 return NULL;
1551 /* If vector/vector or vector/scalar rotate is supported by the target,
1552 don't do anything here. */
1553 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1554 if (optab1
1555 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1556 return NULL;
1558 if (bb_vinfo != NULL || dt != vect_internal_def)
1560 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1561 if (optab2
1562 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1563 return NULL;
1566 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1567 don't do anything here either. */
1568 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1569 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1570 if (!optab1
1571 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1572 || !optab2
1573 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1575 if (bb_vinfo == NULL && dt == vect_internal_def)
1576 return NULL;
1577 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1578 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1579 if (!optab1
1580 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1581 || !optab2
1582 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1583 return NULL;
1586 *type_in = vectype;
1587 *type_out = vectype;
1588 if (*type_in == NULL_TREE)
1589 return NULL;
1591 if (dt == vect_external_def
1592 && TREE_CODE (oprnd1) == SSA_NAME
1593 && loop_vinfo)
1595 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1596 ext_def = loop_preheader_edge (loop);
1597 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1599 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1600 if (bb == NULL
1601 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1602 ext_def = NULL;
1606 def = NULL_TREE;
1607 if (TREE_CODE (oprnd1) == INTEGER_CST
1608 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1609 def = oprnd1;
1610 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1612 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1613 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1614 && TYPE_PRECISION (TREE_TYPE (rhs1))
1615 == TYPE_PRECISION (type))
1616 def = rhs1;
1619 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1620 if (def == NULL_TREE)
1622 def = vect_recog_temp_ssa_var (type, NULL);
1623 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1624 NULL_TREE);
1625 if (ext_def)
1627 basic_block new_bb
1628 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1629 gcc_assert (!new_bb);
1631 else
1632 append_pattern_def_seq (stmt_vinfo, def_stmt);
1634 stype = TREE_TYPE (def);
1636 if (TREE_CODE (def) == INTEGER_CST)
1638 if (!tree_fits_uhwi_p (def)
1639 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1640 || integer_zerop (def))
1641 return NULL;
1642 def2 = build_int_cst (stype,
1643 GET_MODE_PRECISION (TYPE_MODE (type))
1644 - tree_to_uhwi (def));
1646 else
1648 tree vecstype = get_vectype_for_scalar_type (stype);
1649 stmt_vec_info def_stmt_vinfo;
1651 if (vecstype == NULL_TREE)
1652 return NULL;
1653 def2 = vect_recog_temp_ssa_var (stype, NULL);
1654 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1655 NULL_TREE);
1656 if (ext_def)
1658 basic_block new_bb
1659 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1660 gcc_assert (!new_bb);
1662 else
1664 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1665 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1666 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1667 append_pattern_def_seq (stmt_vinfo, def_stmt);
1670 def2 = vect_recog_temp_ssa_var (stype, NULL);
1671 tree mask
1672 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1673 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1674 gimple_assign_lhs (def_stmt),
1675 mask);
1676 if (ext_def)
1678 basic_block new_bb
1679 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1680 gcc_assert (!new_bb);
1682 else
1684 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1685 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1686 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1687 append_pattern_def_seq (stmt_vinfo, def_stmt);
1691 var1 = vect_recog_temp_ssa_var (type, NULL);
1692 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1693 ? LSHIFT_EXPR : RSHIFT_EXPR,
1694 var1, oprnd0, def);
1695 append_pattern_def_seq (stmt_vinfo, def_stmt);
1697 var2 = vect_recog_temp_ssa_var (type, NULL);
1698 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1699 ? RSHIFT_EXPR : LSHIFT_EXPR,
1700 var2, oprnd0, def2);
1701 append_pattern_def_seq (stmt_vinfo, def_stmt);
1703 /* Pattern detected. */
1704 if (dump_enabled_p ())
1705 dump_printf_loc (MSG_NOTE, vect_location,
1706 "vect_recog_rotate_pattern: detected:\n");
1708 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1709 var = vect_recog_temp_ssa_var (type, NULL);
1710 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1712 if (dump_enabled_p ())
1713 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1715 stmts->safe_push (last_stmt);
1716 return pattern_stmt;
1719 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1720 vectorized:
1722 type a_t;
1723 TYPE b_T, res_T;
1725 S1 a_t = ;
1726 S2 b_T = ;
1727 S3 res_T = b_T op a_t;
1729 where type 'TYPE' is a type with different size than 'type',
1730 and op is <<, >> or rotate.
1732 Also detect cases:
1734 type a_t;
1735 TYPE b_T, c_T, res_T;
1737 S0 c_T = ;
1738 S1 a_t = (type) c_T;
1739 S2 b_T = ;
1740 S3 res_T = b_T op a_t;
1742 Input/Output:
1744 * STMTS: Contains a stmt from which the pattern search begins,
1745 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1746 with a shift/rotate which has same type on both operands, in the
1747 second case just b_T op c_T, in the first case with added cast
1748 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1750 Output:
1752 * TYPE_IN: The type of the input arguments to the pattern.
1754 * TYPE_OUT: The type of the output of this pattern.
1756 * Return value: A new stmt that will be used to replace the shift/rotate
1757 S3 stmt. */
1759 static gimple
1760 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1761 tree *type_in, tree *type_out)
1763 gimple last_stmt = stmts->pop ();
1764 tree oprnd0, oprnd1, lhs, var;
1765 gimple pattern_stmt, def_stmt;
1766 enum tree_code rhs_code;
1767 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1768 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1769 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1770 enum vect_def_type dt;
1771 tree def;
1773 if (!is_gimple_assign (last_stmt))
1774 return NULL;
1776 rhs_code = gimple_assign_rhs_code (last_stmt);
1777 switch (rhs_code)
1779 case LSHIFT_EXPR:
1780 case RSHIFT_EXPR:
1781 case LROTATE_EXPR:
1782 case RROTATE_EXPR:
1783 break;
1784 default:
1785 return NULL;
1788 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1789 return NULL;
1791 lhs = gimple_assign_lhs (last_stmt);
1792 oprnd0 = gimple_assign_rhs1 (last_stmt);
1793 oprnd1 = gimple_assign_rhs2 (last_stmt);
1794 if (TREE_CODE (oprnd0) != SSA_NAME
1795 || TREE_CODE (oprnd1) != SSA_NAME
1796 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1797 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1798 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1799 || TYPE_PRECISION (TREE_TYPE (lhs))
1800 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1801 return NULL;
1803 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1804 &def, &dt))
1805 return NULL;
1807 if (dt != vect_internal_def)
1808 return NULL;
1810 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1811 *type_out = *type_in;
1812 if (*type_in == NULL_TREE)
1813 return NULL;
1815 def = NULL_TREE;
1816 if (gimple_assign_cast_p (def_stmt))
1818 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1819 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1820 && TYPE_PRECISION (TREE_TYPE (rhs1))
1821 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1822 def = rhs1;
1825 if (def == NULL_TREE)
1827 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1828 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1829 NULL_TREE);
1830 new_pattern_def_seq (stmt_vinfo, def_stmt);
1833 /* Pattern detected. */
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_NOTE, vect_location,
1836 "vect_recog_vector_vector_shift_pattern: detected:\n");
1838 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1839 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1840 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1842 if (dump_enabled_p ())
1843 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1845 stmts->safe_push (last_stmt);
1846 return pattern_stmt;
1849 /* Detect a signed division by a constant that wouldn't be
1850 otherwise vectorized:
1852 type a_t, b_t;
1854 S1 a_t = b_t / N;
1856 where type 'type' is an integral type and N is a constant.
1858 Similarly handle modulo by a constant:
1860 S4 a_t = b_t % N;
1862 Input/Output:
1864 * STMTS: Contains a stmt from which the pattern search begins,
1865 i.e. the division stmt. S1 is replaced by if N is a power
1866 of two constant and type is signed:
1867 S3 y_t = b_t < 0 ? N - 1 : 0;
1868 S2 x_t = b_t + y_t;
1869 S1' a_t = x_t >> log2 (N);
1871 S4 is replaced if N is a power of two constant and
1872 type is signed by (where *_T temporaries have unsigned type):
1873 S9 y_T = b_t < 0 ? -1U : 0U;
1874 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1875 S7 z_t = (type) z_T;
1876 S6 w_t = b_t + z_t;
1877 S5 x_t = w_t & (N - 1);
1878 S4' a_t = x_t - z_t;
1880 Output:
1882 * TYPE_IN: The type of the input arguments to the pattern.
1884 * TYPE_OUT: The type of the output of this pattern.
1886 * Return value: A new stmt that will be used to replace the division
1887 S1 or modulo S4 stmt. */
1889 static gimple
1890 vect_recog_divmod_pattern (vec<gimple> *stmts,
1891 tree *type_in, tree *type_out)
1893 gimple last_stmt = stmts->pop ();
1894 tree oprnd0, oprnd1, vectype, itype, cond;
1895 gimple pattern_stmt, def_stmt;
1896 enum tree_code rhs_code;
1897 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1898 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1899 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1900 optab optab;
1901 tree q;
1902 int dummy_int, prec;
1903 stmt_vec_info def_stmt_vinfo;
1905 if (!is_gimple_assign (last_stmt))
1906 return NULL;
1908 rhs_code = gimple_assign_rhs_code (last_stmt);
1909 switch (rhs_code)
1911 case TRUNC_DIV_EXPR:
1912 case TRUNC_MOD_EXPR:
1913 break;
1914 default:
1915 return NULL;
1918 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1919 return NULL;
1921 oprnd0 = gimple_assign_rhs1 (last_stmt);
1922 oprnd1 = gimple_assign_rhs2 (last_stmt);
1923 itype = TREE_TYPE (oprnd0);
1924 if (TREE_CODE (oprnd0) != SSA_NAME
1925 || TREE_CODE (oprnd1) != INTEGER_CST
1926 || TREE_CODE (itype) != INTEGER_TYPE
1927 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1928 return NULL;
1930 vectype = get_vectype_for_scalar_type (itype);
1931 if (vectype == NULL_TREE)
1932 return NULL;
1934 /* If the target can handle vectorized division or modulo natively,
1935 don't attempt to optimize this. */
1936 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1937 if (optab != unknown_optab)
1939 enum machine_mode vec_mode = TYPE_MODE (vectype);
1940 int icode = (int) optab_handler (optab, vec_mode);
1941 if (icode != CODE_FOR_nothing)
1942 return NULL;
1945 prec = TYPE_PRECISION (itype);
1946 if (integer_pow2p (oprnd1))
1948 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1949 return NULL;
1951 /* Pattern detected. */
1952 if (dump_enabled_p ())
1953 dump_printf_loc (MSG_NOTE, vect_location,
1954 "vect_recog_divmod_pattern: detected:\n");
1956 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1957 build_int_cst (itype, 0));
1958 if (rhs_code == TRUNC_DIV_EXPR)
1960 tree var = vect_recog_temp_ssa_var (itype, NULL);
1961 tree shift;
1962 def_stmt
1963 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1964 fold_build2 (MINUS_EXPR, itype,
1965 oprnd1,
1966 build_int_cst (itype,
1967 1)),
1968 build_int_cst (itype, 0));
1969 new_pattern_def_seq (stmt_vinfo, def_stmt);
1970 var = vect_recog_temp_ssa_var (itype, NULL);
1971 def_stmt
1972 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1973 gimple_assign_lhs (def_stmt));
1974 append_pattern_def_seq (stmt_vinfo, def_stmt);
1976 shift = build_int_cst (itype, tree_log2 (oprnd1));
1977 pattern_stmt
1978 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1979 vect_recog_temp_ssa_var (itype,
1980 NULL),
1981 var, shift);
1983 else
1985 tree signmask;
1986 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1987 if (compare_tree_int (oprnd1, 2) == 0)
1989 signmask = vect_recog_temp_ssa_var (itype, NULL);
1990 def_stmt
1991 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1992 build_int_cst (itype, 1),
1993 build_int_cst (itype, 0));
1994 append_pattern_def_seq (stmt_vinfo, def_stmt);
1996 else
1998 tree utype
1999 = build_nonstandard_integer_type (prec, 1);
2000 tree vecutype = get_vectype_for_scalar_type (utype);
2001 tree shift
2002 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2003 - tree_log2 (oprnd1));
2004 tree var = vect_recog_temp_ssa_var (utype, NULL);
2006 def_stmt
2007 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2008 build_int_cst (utype, -1),
2009 build_int_cst (utype, 0));
2010 def_stmt_vinfo
2011 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2012 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2013 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2014 append_pattern_def_seq (stmt_vinfo, def_stmt);
2015 var = vect_recog_temp_ssa_var (utype, NULL);
2016 def_stmt
2017 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2018 gimple_assign_lhs (def_stmt),
2019 shift);
2020 def_stmt_vinfo
2021 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2022 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2023 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2024 append_pattern_def_seq (stmt_vinfo, def_stmt);
2025 signmask = vect_recog_temp_ssa_var (itype, NULL);
2026 def_stmt
2027 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2028 NULL_TREE);
2029 append_pattern_def_seq (stmt_vinfo, def_stmt);
2031 def_stmt
2032 = gimple_build_assign_with_ops (PLUS_EXPR,
2033 vect_recog_temp_ssa_var (itype,
2034 NULL),
2035 oprnd0, signmask);
2036 append_pattern_def_seq (stmt_vinfo, def_stmt);
2037 def_stmt
2038 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2039 vect_recog_temp_ssa_var (itype,
2040 NULL),
2041 gimple_assign_lhs (def_stmt),
2042 fold_build2 (MINUS_EXPR, itype,
2043 oprnd1,
2044 build_int_cst (itype,
2045 1)));
2046 append_pattern_def_seq (stmt_vinfo, def_stmt);
2048 pattern_stmt
2049 = gimple_build_assign_with_ops (MINUS_EXPR,
2050 vect_recog_temp_ssa_var (itype,
2051 NULL),
2052 gimple_assign_lhs (def_stmt),
2053 signmask);
2056 if (dump_enabled_p ())
2057 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2060 stmts->safe_push (last_stmt);
2062 *type_in = vectype;
2063 *type_out = vectype;
2064 return pattern_stmt;
2067 if (prec > HOST_BITS_PER_WIDE_INT
2068 || integer_zerop (oprnd1))
2069 return NULL;
2071 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2072 return NULL;
2074 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2076 if (TYPE_UNSIGNED (itype))
2078 unsigned HOST_WIDE_INT mh, ml;
2079 int pre_shift, post_shift;
2080 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2081 & GET_MODE_MASK (TYPE_MODE (itype)));
2082 tree t1, t2, t3, t4;
2084 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2085 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2086 return NULL;
2088 /* Find a suitable multiplier and right shift count
2089 instead of multiplying with D. */
2090 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2092 /* If the suggested multiplier is more than SIZE bits, we can do better
2093 for even divisors, using an initial right shift. */
2094 if (mh != 0 && (d & 1) == 0)
2096 pre_shift = floor_log2 (d & -d);
2097 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2098 &ml, &post_shift, &dummy_int);
2099 gcc_assert (!mh);
2101 else
2102 pre_shift = 0;
2104 if (mh != 0)
2106 if (post_shift - 1 >= prec)
2107 return NULL;
2109 /* t1 = oprnd0 h* ml;
2110 t2 = oprnd0 - t1;
2111 t3 = t2 >> 1;
2112 t4 = t1 + t3;
2113 q = t4 >> (post_shift - 1); */
2114 t1 = vect_recog_temp_ssa_var (itype, NULL);
2115 def_stmt
2116 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2117 build_int_cst (itype, ml));
2118 append_pattern_def_seq (stmt_vinfo, def_stmt);
2120 t2 = vect_recog_temp_ssa_var (itype, NULL);
2121 def_stmt
2122 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2123 append_pattern_def_seq (stmt_vinfo, def_stmt);
2125 t3 = vect_recog_temp_ssa_var (itype, NULL);
2126 def_stmt
2127 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2128 integer_one_node);
2129 append_pattern_def_seq (stmt_vinfo, def_stmt);
2131 t4 = vect_recog_temp_ssa_var (itype, NULL);
2132 def_stmt
2133 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2135 if (post_shift != 1)
2137 append_pattern_def_seq (stmt_vinfo, def_stmt);
2139 q = vect_recog_temp_ssa_var (itype, NULL);
2140 pattern_stmt
2141 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2142 build_int_cst (itype,
2143 post_shift
2144 - 1));
2146 else
2148 q = t4;
2149 pattern_stmt = def_stmt;
2152 else
2154 if (pre_shift >= prec || post_shift >= prec)
2155 return NULL;
2157 /* t1 = oprnd0 >> pre_shift;
2158 t2 = t1 h* ml;
2159 q = t2 >> post_shift; */
2160 if (pre_shift)
2162 t1 = vect_recog_temp_ssa_var (itype, NULL);
2163 def_stmt
2164 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2165 build_int_cst (NULL,
2166 pre_shift));
2167 append_pattern_def_seq (stmt_vinfo, def_stmt);
2169 else
2170 t1 = oprnd0;
2172 t2 = vect_recog_temp_ssa_var (itype, NULL);
2173 def_stmt
2174 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2175 build_int_cst (itype, ml));
2177 if (post_shift)
2179 append_pattern_def_seq (stmt_vinfo, def_stmt);
2181 q = vect_recog_temp_ssa_var (itype, NULL);
2182 def_stmt
2183 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2184 build_int_cst (itype,
2185 post_shift));
2187 else
2188 q = t2;
2190 pattern_stmt = def_stmt;
2193 else
2195 unsigned HOST_WIDE_INT ml;
2196 int post_shift;
2197 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2198 unsigned HOST_WIDE_INT abs_d;
2199 bool add = false;
2200 tree t1, t2, t3, t4;
2202 /* Give up for -1. */
2203 if (d == -1)
2204 return NULL;
2206 /* Since d might be INT_MIN, we have to cast to
2207 unsigned HOST_WIDE_INT before negating to avoid
2208 undefined signed overflow. */
2209 abs_d = (d >= 0
2210 ? (unsigned HOST_WIDE_INT) d
2211 : - (unsigned HOST_WIDE_INT) d);
2213 /* n rem d = n rem -d */
2214 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2216 d = abs_d;
2217 oprnd1 = build_int_cst (itype, abs_d);
2219 else if (HOST_BITS_PER_WIDE_INT >= prec
2220 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2221 /* This case is not handled correctly below. */
2222 return NULL;
2224 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2225 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2227 add = true;
2228 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2230 if (post_shift >= prec)
2231 return NULL;
2233 /* t1 = oprnd0 h* ml; */
2234 t1 = vect_recog_temp_ssa_var (itype, NULL);
2235 def_stmt
2236 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2237 build_int_cst (itype, ml));
2239 if (add)
2241 /* t2 = t1 + oprnd0; */
2242 append_pattern_def_seq (stmt_vinfo, def_stmt);
2243 t2 = vect_recog_temp_ssa_var (itype, NULL);
2244 def_stmt
2245 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2247 else
2248 t2 = t1;
2250 if (post_shift)
2252 /* t3 = t2 >> post_shift; */
2253 append_pattern_def_seq (stmt_vinfo, def_stmt);
2254 t3 = vect_recog_temp_ssa_var (itype, NULL);
2255 def_stmt
2256 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2257 build_int_cst (itype, post_shift));
2259 else
2260 t3 = t2;
2262 double_int oprnd0_min, oprnd0_max;
2263 int msb = 1;
2264 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2266 if (!oprnd0_min.is_negative ())
2267 msb = 0;
2268 else if (oprnd0_max.is_negative ())
2269 msb = -1;
2272 if (msb == 0 && d >= 0)
2274 /* q = t3; */
2275 q = t3;
2276 pattern_stmt = def_stmt;
2278 else
2280 /* t4 = oprnd0 >> (prec - 1);
2281 or if we know from VRP that oprnd0 >= 0
2282 t4 = 0;
2283 or if we know from VRP that oprnd0 < 0
2284 t4 = -1; */
2285 append_pattern_def_seq (stmt_vinfo, def_stmt);
2286 t4 = vect_recog_temp_ssa_var (itype, NULL);
2287 if (msb != 1)
2288 def_stmt
2289 = gimple_build_assign_with_ops (INTEGER_CST,
2290 t4, build_int_cst (itype, msb),
2291 NULL_TREE);
2292 else
2293 def_stmt
2294 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2295 build_int_cst (itype, prec - 1));
2296 append_pattern_def_seq (stmt_vinfo, def_stmt);
2298 /* q = t3 - t4; or q = t4 - t3; */
2299 q = vect_recog_temp_ssa_var (itype, NULL);
2300 pattern_stmt
2301 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2302 d < 0 ? t3 : t4);
2306 if (rhs_code == TRUNC_MOD_EXPR)
2308 tree r, t1;
2310 /* We divided. Now finish by:
2311 t1 = q * oprnd1;
2312 r = oprnd0 - t1; */
2313 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2315 t1 = vect_recog_temp_ssa_var (itype, NULL);
2316 def_stmt
2317 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2318 append_pattern_def_seq (stmt_vinfo, def_stmt);
2320 r = vect_recog_temp_ssa_var (itype, NULL);
2321 pattern_stmt
2322 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2325 /* Pattern detected. */
2326 if (dump_enabled_p ())
2328 dump_printf_loc (MSG_NOTE, vect_location,
2329 "vect_recog_divmod_pattern: detected: ");
2330 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2331 dump_printf (MSG_NOTE, "\n");
2334 stmts->safe_push (last_stmt);
2336 *type_in = vectype;
2337 *type_out = vectype;
2338 return pattern_stmt;
2341 /* Function vect_recog_mixed_size_cond_pattern
2343 Try to find the following pattern:
2345 type x_t, y_t;
2346 TYPE a_T, b_T, c_T;
2347 loop:
2348 S1 a_T = x_t CMP y_t ? b_T : c_T;
2350 where type 'TYPE' is an integral type which has different size
2351 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2352 than 'type', the constants need to fit into an integer type
2353 with the same width as 'type') or results of conversion from 'type'.
2355 Input:
2357 * LAST_STMT: A stmt from which the pattern search begins.
2359 Output:
2361 * TYPE_IN: The type of the input arguments to the pattern.
2363 * TYPE_OUT: The type of the output of this pattern.
2365 * Return value: A new stmt that will be used to replace the pattern.
2366 Additionally a def_stmt is added.
2368 a_it = x_t CMP y_t ? b_it : c_it;
2369 a_T = (TYPE) a_it; */
2371 static gimple
2372 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2373 tree *type_out)
2375 gimple last_stmt = (*stmts)[0];
2376 tree cond_expr, then_clause, else_clause;
2377 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2378 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2379 enum machine_mode cmpmode;
2380 gimple pattern_stmt, def_stmt;
2381 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2382 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2383 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2384 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2385 bool promotion;
2386 tree comp_scalar_type;
2388 if (!is_gimple_assign (last_stmt)
2389 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2390 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2391 return NULL;
2393 cond_expr = gimple_assign_rhs1 (last_stmt);
2394 then_clause = gimple_assign_rhs2 (last_stmt);
2395 else_clause = gimple_assign_rhs3 (last_stmt);
2397 if (!COMPARISON_CLASS_P (cond_expr))
2398 return NULL;
2400 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2401 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2402 if (comp_vectype == NULL_TREE)
2403 return NULL;
2405 type = gimple_expr_type (last_stmt);
2406 if (types_compatible_p (type, comp_scalar_type)
2407 || ((TREE_CODE (then_clause) != INTEGER_CST
2408 || TREE_CODE (else_clause) != INTEGER_CST)
2409 && !INTEGRAL_TYPE_P (comp_scalar_type))
2410 || !INTEGRAL_TYPE_P (type))
2411 return NULL;
2413 if ((TREE_CODE (then_clause) != INTEGER_CST
2414 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2415 &def_stmt0, &promotion))
2416 || (TREE_CODE (else_clause) != INTEGER_CST
2417 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2418 &def_stmt1, &promotion)))
2419 return NULL;
2421 if (orig_type0 && orig_type1
2422 && !types_compatible_p (orig_type0, orig_type1))
2423 return NULL;
2425 if (orig_type0)
2427 if (!types_compatible_p (orig_type0, comp_scalar_type))
2428 return NULL;
2429 then_clause = gimple_assign_rhs1 (def_stmt0);
2430 itype = orig_type0;
2433 if (orig_type1)
2435 if (!types_compatible_p (orig_type1, comp_scalar_type))
2436 return NULL;
2437 else_clause = gimple_assign_rhs1 (def_stmt1);
2438 itype = orig_type1;
2441 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2443 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2444 return NULL;
2446 vectype = get_vectype_for_scalar_type (type);
2447 if (vectype == NULL_TREE)
2448 return NULL;
2450 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2451 return NULL;
2453 if (itype == NULL_TREE)
2454 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2455 TYPE_UNSIGNED (type));
2457 if (itype == NULL_TREE
2458 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2459 return NULL;
2461 vecitype = get_vectype_for_scalar_type (itype);
2462 if (vecitype == NULL_TREE)
2463 return NULL;
2465 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2466 return NULL;
2468 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2470 if ((TREE_CODE (then_clause) == INTEGER_CST
2471 && !int_fits_type_p (then_clause, itype))
2472 || (TREE_CODE (else_clause) == INTEGER_CST
2473 && !int_fits_type_p (else_clause, itype)))
2474 return NULL;
2477 def_stmt
2478 = gimple_build_assign_with_ops (COND_EXPR,
2479 vect_recog_temp_ssa_var (itype, NULL),
2480 unshare_expr (cond_expr),
2481 fold_convert (itype, then_clause),
2482 fold_convert (itype, else_clause));
2483 pattern_stmt
2484 = gimple_build_assign_with_ops (NOP_EXPR,
2485 vect_recog_temp_ssa_var (type, NULL),
2486 gimple_assign_lhs (def_stmt), NULL_TREE);
2488 new_pattern_def_seq (stmt_vinfo, def_stmt);
2489 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2490 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2491 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2492 *type_in = vecitype;
2493 *type_out = vectype;
2495 if (dump_enabled_p ())
2496 dump_printf_loc (MSG_NOTE, vect_location,
2497 "vect_recog_mixed_size_cond_pattern: detected:\n");
2499 return pattern_stmt;
2503 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2504 true if bool VAR can be optimized that way. */
2506 static bool
2507 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2509 gimple def_stmt;
2510 enum vect_def_type dt;
2511 tree def, rhs1;
2512 enum tree_code rhs_code;
2514 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2515 &dt))
2516 return false;
2518 if (dt != vect_internal_def)
2519 return false;
2521 if (!is_gimple_assign (def_stmt))
2522 return false;
2524 if (!has_single_use (def))
2525 return false;
2527 rhs1 = gimple_assign_rhs1 (def_stmt);
2528 rhs_code = gimple_assign_rhs_code (def_stmt);
2529 switch (rhs_code)
2531 case SSA_NAME:
2532 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2534 CASE_CONVERT:
2535 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2536 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2537 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2538 return false;
2539 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2541 case BIT_NOT_EXPR:
2542 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2544 case BIT_AND_EXPR:
2545 case BIT_IOR_EXPR:
2546 case BIT_XOR_EXPR:
2547 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2548 return false;
2549 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2550 bb_vinfo);
2552 default:
2553 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2555 tree vecitype, comp_vectype;
2557 /* If the comparison can throw, then is_gimple_condexpr will be
2558 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2559 if (stmt_could_throw_p (def_stmt))
2560 return false;
2562 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2563 if (comp_vectype == NULL_TREE)
2564 return false;
2566 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2568 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2569 tree itype
2570 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2571 vecitype = get_vectype_for_scalar_type (itype);
2572 if (vecitype == NULL_TREE)
2573 return false;
2575 else
2576 vecitype = comp_vectype;
2577 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2579 return false;
2584 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2585 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2586 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2588 static tree
2589 adjust_bool_pattern_cast (tree type, tree var)
2591 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2592 gimple cast_stmt, pattern_stmt;
2594 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2595 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2596 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2597 cast_stmt
2598 = gimple_build_assign_with_ops (NOP_EXPR,
2599 vect_recog_temp_ssa_var (type, NULL),
2600 gimple_assign_lhs (pattern_stmt),
2601 NULL_TREE);
2602 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2603 return gimple_assign_lhs (cast_stmt);
2607 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2608 recursively. VAR is an SSA_NAME that should be transformed from bool
2609 to a wider integer type, OUT_TYPE is the desired final integer type of
2610 the whole pattern, TRUEVAL should be NULL unless optimizing
2611 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2612 in the then_clause, STMTS is where statements with added pattern stmts
2613 should be pushed to. */
2615 static tree
2616 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2617 vec<gimple> *stmts)
2619 gimple stmt = SSA_NAME_DEF_STMT (var);
2620 enum tree_code rhs_code, def_rhs_code;
2621 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2622 location_t loc;
2623 gimple pattern_stmt, def_stmt;
2625 rhs1 = gimple_assign_rhs1 (stmt);
2626 rhs2 = gimple_assign_rhs2 (stmt);
2627 rhs_code = gimple_assign_rhs_code (stmt);
2628 loc = gimple_location (stmt);
2629 switch (rhs_code)
2631 case SSA_NAME:
2632 CASE_CONVERT:
2633 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2634 itype = TREE_TYPE (irhs1);
2635 pattern_stmt
2636 = gimple_build_assign_with_ops (SSA_NAME,
2637 vect_recog_temp_ssa_var (itype, NULL),
2638 irhs1, NULL_TREE);
2639 break;
2641 case BIT_NOT_EXPR:
2642 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2643 itype = TREE_TYPE (irhs1);
2644 pattern_stmt
2645 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2646 vect_recog_temp_ssa_var (itype, NULL),
2647 irhs1, build_int_cst (itype, 1));
2648 break;
2650 case BIT_AND_EXPR:
2651 /* Try to optimize x = y & (a < b ? 1 : 0); into
2652 x = (a < b ? y : 0);
2654 E.g. for:
2655 bool a_b, b_b, c_b;
2656 TYPE d_T;
2658 S1 a_b = x1 CMP1 y1;
2659 S2 b_b = x2 CMP2 y2;
2660 S3 c_b = a_b & b_b;
2661 S4 d_T = (TYPE) c_b;
2663 we would normally emit:
2665 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2666 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2667 S3' c_T = a_T & b_T;
2668 S4' d_T = c_T;
2670 but we can save one stmt by using the
2671 result of one of the COND_EXPRs in the other COND_EXPR and leave
2672 BIT_AND_EXPR stmt out:
2674 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2675 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2676 S4' f_T = c_T;
2678 At least when VEC_COND_EXPR is implemented using masks
2679 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2680 computes the comparison masks and ands it, in one case with
2681 all ones vector, in the other case with a vector register.
2682 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2683 often more expensive. */
2684 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2685 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2686 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2688 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2689 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2690 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2691 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2693 gimple tstmt;
2694 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2695 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2696 tstmt = stmts->pop ();
2697 gcc_assert (tstmt == def_stmt);
2698 stmts->quick_push (stmt);
2699 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2700 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2701 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2702 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2703 return irhs2;
2705 else
2706 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2707 goto and_ior_xor;
2709 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2710 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2711 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2713 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2714 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2715 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2716 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2718 gimple tstmt;
2719 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2720 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2721 tstmt = stmts->pop ();
2722 gcc_assert (tstmt == def_stmt);
2723 stmts->quick_push (stmt);
2724 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2725 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2726 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2727 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2728 return irhs1;
2730 else
2731 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2732 goto and_ior_xor;
2734 /* FALLTHRU */
2735 case BIT_IOR_EXPR:
2736 case BIT_XOR_EXPR:
2737 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2738 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2739 and_ior_xor:
2740 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2741 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2743 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2744 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2745 int out_prec = TYPE_PRECISION (out_type);
2746 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2747 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2748 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2749 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2750 else
2752 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2753 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2756 itype = TREE_TYPE (irhs1);
2757 pattern_stmt
2758 = gimple_build_assign_with_ops (rhs_code,
2759 vect_recog_temp_ssa_var (itype, NULL),
2760 irhs1, irhs2);
2761 break;
2763 default:
2764 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2765 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2766 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2767 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2768 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2770 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2771 itype
2772 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2774 else
2775 itype = TREE_TYPE (rhs1);
2776 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2777 if (trueval == NULL_TREE)
2778 trueval = build_int_cst (itype, 1);
2779 else
2780 gcc_checking_assert (useless_type_conversion_p (itype,
2781 TREE_TYPE (trueval)));
2782 pattern_stmt
2783 = gimple_build_assign_with_ops (COND_EXPR,
2784 vect_recog_temp_ssa_var (itype, NULL),
2785 cond_expr, trueval,
2786 build_int_cst (itype, 0));
2787 break;
2790 stmts->safe_push (stmt);
2791 gimple_set_location (pattern_stmt, loc);
2792 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2793 return gimple_assign_lhs (pattern_stmt);
2797 /* Function vect_recog_bool_pattern
2799 Try to find pattern like following:
2801 bool a_b, b_b, c_b, d_b, e_b;
2802 TYPE f_T;
2803 loop:
2804 S1 a_b = x1 CMP1 y1;
2805 S2 b_b = x2 CMP2 y2;
2806 S3 c_b = a_b & b_b;
2807 S4 d_b = x3 CMP3 y3;
2808 S5 e_b = c_b | d_b;
2809 S6 f_T = (TYPE) e_b;
2811 where type 'TYPE' is an integral type.
2813 Input:
2815 * LAST_STMT: A stmt at the end from which the pattern
2816 search begins, i.e. cast of a bool to
2817 an integer type.
2819 Output:
2821 * TYPE_IN: The type of the input arguments to the pattern.
2823 * TYPE_OUT: The type of the output of this pattern.
2825 * Return value: A new stmt that will be used to replace the pattern.
2827 Assuming size of TYPE is the same as size of all comparisons
2828 (otherwise some casts would be added where needed), the above
2829 sequence we create related pattern stmts:
2830 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2831 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2832 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2833 S5' e_T = c_T | d_T;
2834 S6' f_T = e_T;
2836 Instead of the above S3' we could emit:
2837 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2838 S3' c_T = a_T | b_T;
2839 but the above is more efficient. */
2841 static gimple
2842 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2843 tree *type_out)
2845 gimple last_stmt = stmts->pop ();
2846 enum tree_code rhs_code;
2847 tree var, lhs, rhs, vectype;
2848 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2849 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2850 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2851 gimple pattern_stmt;
2853 if (!is_gimple_assign (last_stmt))
2854 return NULL;
2856 var = gimple_assign_rhs1 (last_stmt);
2857 lhs = gimple_assign_lhs (last_stmt);
2859 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2860 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2861 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2862 return NULL;
2864 rhs_code = gimple_assign_rhs_code (last_stmt);
2865 if (CONVERT_EXPR_CODE_P (rhs_code))
2867 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2868 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2869 return NULL;
2870 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2871 if (vectype == NULL_TREE)
2872 return NULL;
2874 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2875 return NULL;
2877 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2878 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2879 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2880 pattern_stmt
2881 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2882 else
2883 pattern_stmt
2884 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2885 *type_out = vectype;
2886 *type_in = vectype;
2887 stmts->safe_push (last_stmt);
2888 if (dump_enabled_p ())
2889 dump_printf_loc (MSG_NOTE, vect_location,
2890 "vect_recog_bool_pattern: detected:\n");
2892 return pattern_stmt;
2894 else if (rhs_code == SSA_NAME
2895 && STMT_VINFO_DATA_REF (stmt_vinfo))
2897 stmt_vec_info pattern_stmt_info;
2898 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2899 gcc_assert (vectype != NULL_TREE);
2900 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2901 return NULL;
2902 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2903 return NULL;
2905 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2906 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2907 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2909 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2910 gimple cast_stmt
2911 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2912 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2913 rhs = rhs2;
2915 pattern_stmt
2916 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2917 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2918 bb_vinfo);
2919 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2920 STMT_VINFO_DATA_REF (pattern_stmt_info)
2921 = STMT_VINFO_DATA_REF (stmt_vinfo);
2922 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2923 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2924 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2925 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2926 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2927 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2928 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2929 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2930 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2931 *type_out = vectype;
2932 *type_in = vectype;
2933 stmts->safe_push (last_stmt);
2934 if (dump_enabled_p ())
2935 dump_printf_loc (MSG_NOTE, vect_location,
2936 "vect_recog_bool_pattern: detected:\n");
2937 return pattern_stmt;
2939 else
2940 return NULL;
2944 /* Mark statements that are involved in a pattern. */
2946 static inline void
2947 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2948 tree pattern_vectype)
2950 stmt_vec_info pattern_stmt_info, def_stmt_info;
2951 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2952 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2953 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2954 gimple def_stmt;
2956 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2957 if (pattern_stmt_info == NULL)
2959 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2960 bb_vinfo);
2961 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2963 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2965 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2966 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2967 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2968 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2969 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2970 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2971 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2972 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2973 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2975 gimple_stmt_iterator si;
2976 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2977 !gsi_end_p (si); gsi_next (&si))
2979 def_stmt = gsi_stmt (si);
2980 def_stmt_info = vinfo_for_stmt (def_stmt);
2981 if (def_stmt_info == NULL)
2983 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2984 bb_vinfo);
2985 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2987 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2988 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2989 STMT_VINFO_DEF_TYPE (def_stmt_info)
2990 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2991 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2992 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2997 /* Function vect_pattern_recog_1
2999 Input:
3000 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3001 computation pattern.
3002 STMT: A stmt from which the pattern search should start.
3004 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3005 expression that computes the same functionality and can be used to
3006 replace the sequence of stmts that are involved in the pattern.
3008 Output:
3009 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3010 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3011 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3012 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3013 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3014 to the available target pattern.
3016 This function also does some bookkeeping, as explained in the documentation
3017 for vect_recog_pattern. */
3019 static void
3020 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3021 gimple_stmt_iterator si,
3022 vec<gimple> *stmts_to_replace)
3024 gimple stmt = gsi_stmt (si), pattern_stmt;
3025 stmt_vec_info stmt_info;
3026 loop_vec_info loop_vinfo;
3027 tree pattern_vectype;
3028 tree type_in, type_out;
3029 enum tree_code code;
3030 int i;
3031 gimple next;
3033 stmts_to_replace->truncate (0);
3034 stmts_to_replace->quick_push (stmt);
3035 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3036 if (!pattern_stmt)
3037 return;
3039 stmt = stmts_to_replace->last ();
3040 stmt_info = vinfo_for_stmt (stmt);
3041 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3043 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3045 /* No need to check target support (already checked by the pattern
3046 recognition function). */
3047 pattern_vectype = type_out ? type_out : type_in;
3049 else
3051 enum machine_mode vec_mode;
3052 enum insn_code icode;
3053 optab optab;
3055 /* Check target support */
3056 type_in = get_vectype_for_scalar_type (type_in);
3057 if (!type_in)
3058 return;
3059 if (type_out)
3060 type_out = get_vectype_for_scalar_type (type_out);
3061 else
3062 type_out = type_in;
3063 if (!type_out)
3064 return;
3065 pattern_vectype = type_out;
3067 if (is_gimple_assign (pattern_stmt))
3068 code = gimple_assign_rhs_code (pattern_stmt);
3069 else
3071 gcc_assert (is_gimple_call (pattern_stmt));
3072 code = CALL_EXPR;
3075 optab = optab_for_tree_code (code, type_in, optab_default);
3076 vec_mode = TYPE_MODE (type_in);
3077 if (!optab
3078 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3079 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3080 return;
3083 /* Found a vectorizable pattern. */
3084 if (dump_enabled_p ())
3086 dump_printf_loc (MSG_NOTE, vect_location,
3087 "pattern recognized: ");
3088 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3089 dump_printf (MSG_NOTE, "\n");
3092 /* Mark the stmts that are involved in the pattern. */
3093 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3095 /* Patterns cannot be vectorized using SLP, because they change the order of
3096 computation. */
3097 if (loop_vinfo)
3098 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3099 if (next == stmt)
3100 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3102 /* It is possible that additional pattern stmts are created and inserted in
3103 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3104 relevant statements. */
3105 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3106 && (unsigned) i < (stmts_to_replace->length () - 1);
3107 i++)
3109 stmt_info = vinfo_for_stmt (stmt);
3110 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3111 if (dump_enabled_p ())
3113 dump_printf_loc (MSG_NOTE, vect_location,
3114 "additional pattern stmt: ");
3115 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3116 dump_printf (MSG_NOTE, "\n");
3119 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3124 /* Function vect_pattern_recog
3126 Input:
3127 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3128 computation idioms.
3130 Output - for each computation idiom that is detected we create a new stmt
3131 that provides the same functionality and that can be vectorized. We
3132 also record some information in the struct_stmt_info of the relevant
3133 stmts, as explained below:
3135 At the entry to this function we have the following stmts, with the
3136 following initial value in the STMT_VINFO fields:
3138 stmt in_pattern_p related_stmt vec_stmt
3139 S1: a_i = .... - - -
3140 S2: a_2 = ..use(a_i).. - - -
3141 S3: a_1 = ..use(a_2).. - - -
3142 S4: a_0 = ..use(a_1).. - - -
3143 S5: ... = ..use(a_0).. - - -
3145 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3146 represented by a single stmt. We then:
3147 - create a new stmt S6 equivalent to the pattern (the stmt is not
3148 inserted into the code)
3149 - fill in the STMT_VINFO fields as follows:
3151 in_pattern_p related_stmt vec_stmt
3152 S1: a_i = .... - - -
3153 S2: a_2 = ..use(a_i).. - - -
3154 S3: a_1 = ..use(a_2).. - - -
3155 S4: a_0 = ..use(a_1).. true S6 -
3156 '---> S6: a_new = .... - S4 -
3157 S5: ... = ..use(a_0).. - - -
3159 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3160 to each other through the RELATED_STMT field).
3162 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3163 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3164 remain irrelevant unless used by stmts other than S4.
3166 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3167 (because they are marked as irrelevant). It will vectorize S6, and record
3168 a pointer to the new vector stmt VS6 from S6 (as usual).
3169 S4 will be skipped, and S5 will be vectorized as usual:
3171 in_pattern_p related_stmt vec_stmt
3172 S1: a_i = .... - - -
3173 S2: a_2 = ..use(a_i).. - - -
3174 S3: a_1 = ..use(a_2).. - - -
3175 > VS6: va_new = .... - - -
3176 S4: a_0 = ..use(a_1).. true S6 VS6
3177 '---> S6: a_new = .... - S4 VS6
3178 > VS5: ... = ..vuse(va_new).. - - -
3179 S5: ... = ..use(a_0).. - - -
3181 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3182 elsewhere), and we'll end up with:
3184 VS6: va_new = ....
3185 VS5: ... = ..vuse(va_new)..
3187 In case of more than one pattern statements, e.g., widen-mult with
3188 intermediate type:
3190 S1 a_t = ;
3191 S2 a_T = (TYPE) a_t;
3192 '--> S3: a_it = (interm_type) a_t;
3193 S4 prod_T = a_T * CONST;
3194 '--> S5: prod_T' = a_it w* CONST;
3196 there may be other users of a_T outside the pattern. In that case S2 will
3197 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3198 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3199 be recorded in S3. */
3201 void
3202 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3204 struct loop *loop;
3205 basic_block *bbs;
3206 unsigned int nbbs;
3207 gimple_stmt_iterator si;
3208 unsigned int i, j;
3209 vect_recog_func_ptr vect_recog_func;
3210 stack_vec<gimple, 1> stmts_to_replace;
3211 gimple stmt;
3213 if (dump_enabled_p ())
3214 dump_printf_loc (MSG_NOTE, vect_location,
3215 "=== vect_pattern_recog ===\n");
3217 if (loop_vinfo)
3219 loop = LOOP_VINFO_LOOP (loop_vinfo);
3220 bbs = LOOP_VINFO_BBS (loop_vinfo);
3221 nbbs = loop->num_nodes;
3223 else
3225 bbs = &BB_VINFO_BB (bb_vinfo);
3226 nbbs = 1;
3229 /* Scan through the loop stmts, applying the pattern recognition
3230 functions starting at each stmt visited: */
3231 for (i = 0; i < nbbs; i++)
3233 basic_block bb = bbs[i];
3234 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3236 if (bb_vinfo && (stmt = gsi_stmt (si))
3237 && vinfo_for_stmt (stmt)
3238 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3239 continue;
3241 /* Scan over all generic vect_recog_xxx_pattern functions. */
3242 for (j = 0; j < NUM_PATTERNS; j++)
3244 vect_recog_func = vect_vect_recog_func_ptrs[j];
3245 vect_pattern_recog_1 (vect_recog_func, si,
3246 &stmts_to_replace);