Daily bump.
[official-gcc.git] / gcc / tree-vect-patterns.c
blob5db023fc40ad4d4dd2659d77b6a682f6a23d1ec0
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "stor-layout.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-ssa-alias.h"
31 #include "internal-fn.h"
32 #include "tree-eh.h"
33 #include "gimple-expr.h"
34 #include "is-a.h"
35 #include "gimple.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
43 #include "cfgloop.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "params.h"
47 #include "tree-data-ref.h"
48 #include "tree-vectorizer.h"
49 #include "recog.h" /* FIXME: for insn_data */
50 #include "diagnostic-core.h"
51 #include "dumpfile.h"
53 /* Pattern recognition functions */
54 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
55 tree *);
56 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
57 tree *);
58 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
59 tree *);
60 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
61 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
62 tree *);
63 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
64 tree *, tree *);
65 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
66 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
67 tree *, tree *);
68 static gimple vect_recog_divmod_pattern (vec<gimple> *,
69 tree *, tree *);
70 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
71 tree *, tree *);
72 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
73 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
74 vect_recog_widen_mult_pattern,
75 vect_recog_widen_sum_pattern,
76 vect_recog_dot_prod_pattern,
77 vect_recog_pow_pattern,
78 vect_recog_widen_shift_pattern,
79 vect_recog_over_widening_pattern,
80 vect_recog_rotate_pattern,
81 vect_recog_vector_vector_shift_pattern,
82 vect_recog_divmod_pattern,
83 vect_recog_mixed_size_cond_pattern,
84 vect_recog_bool_pattern};
86 static inline void
87 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
89 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
90 stmt);
93 static inline void
94 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
96 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
97 append_pattern_def_seq (stmt_info, stmt);
100 /* Check whether STMT2 is in the same loop or basic block as STMT1.
101 Which of the two applies depends on whether we're currently doing
102 loop-based or basic-block-based vectorization, as determined by
103 the vinfo_for_stmt for STMT1 (which must be defined).
105 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
106 to be defined as well. */
108 static bool
109 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
111 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
113 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
115 if (!gimple_bb (stmt2))
116 return false;
118 if (loop_vinfo)
120 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
121 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
122 return false;
124 else
126 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
127 || gimple_code (stmt2) == GIMPLE_PHI)
128 return false;
131 gcc_assert (vinfo_for_stmt (stmt2));
132 return true;
135 /* If the LHS of DEF_STMT has a single use, and that statement is
136 in the same loop or basic block, return it. */
138 static gimple
139 vect_single_imm_use (gimple def_stmt)
141 tree lhs = gimple_assign_lhs (def_stmt);
142 use_operand_p use_p;
143 gimple use_stmt;
145 if (!single_imm_use (lhs, &use_p, &use_stmt))
146 return NULL;
148 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
149 return NULL;
151 return use_stmt;
154 /* Check whether NAME, an ssa-name used in USE_STMT,
155 is a result of a type promotion or demotion, such that:
156 DEF_STMT: NAME = NOP (name0)
157 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
158 If CHECK_SIGN is TRUE, check that either both types are signed or both are
159 unsigned. */
161 static bool
162 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
163 tree *orig_type, gimple *def_stmt, bool *promotion)
165 tree dummy;
166 gimple dummy_gimple;
167 loop_vec_info loop_vinfo;
168 stmt_vec_info stmt_vinfo;
169 tree type = TREE_TYPE (name);
170 tree oprnd0;
171 enum vect_def_type dt;
172 tree def;
173 bb_vec_info bb_vinfo;
175 stmt_vinfo = vinfo_for_stmt (use_stmt);
176 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
177 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
178 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
179 &def, &dt))
180 return false;
182 if (dt != vect_internal_def
183 && dt != vect_external_def && dt != vect_constant_def)
184 return false;
186 if (!*def_stmt)
187 return false;
189 if (!is_gimple_assign (*def_stmt))
190 return false;
192 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
193 return false;
195 oprnd0 = gimple_assign_rhs1 (*def_stmt);
197 *orig_type = TREE_TYPE (oprnd0);
198 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
199 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
200 return false;
202 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
203 *promotion = true;
204 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
205 *promotion = false;
206 else
207 return false;
209 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
210 bb_vinfo, &dummy_gimple, &dummy, &dt))
211 return false;
213 return true;
216 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
217 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
219 static tree
220 vect_recog_temp_ssa_var (tree type, gimple stmt)
222 return make_temp_ssa_name (type, stmt, "patt");
225 /* Function vect_recog_dot_prod_pattern
227 Try to find the following pattern:
229 type x_t, y_t;
230 TYPE1 prod;
231 TYPE2 sum = init;
232 loop:
233 sum_0 = phi <init, sum_1>
234 S1 x_t = ...
235 S2 y_t = ...
236 S3 x_T = (TYPE1) x_t;
237 S4 y_T = (TYPE1) y_t;
238 S5 prod = x_T * y_T;
239 [S6 prod = (TYPE2) prod; #optional]
240 S7 sum_1 = prod + sum_0;
242 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
243 same size of 'TYPE1' or bigger. This is a special case of a reduction
244 computation.
246 Input:
248 * STMTS: Contains a stmt from which the pattern search begins. In the
249 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
250 will be detected.
252 Output:
254 * TYPE_IN: The type of the input arguments to the pattern.
256 * TYPE_OUT: The type of the output of this pattern.
258 * Return value: A new stmt that will be used to replace the sequence of
259 stmts that constitute the pattern. In this case it will be:
260 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
262 Note: The dot-prod idiom is a widening reduction pattern that is
263 vectorized without preserving all the intermediate results. It
264 produces only N/2 (widened) results (by summing up pairs of
265 intermediate results) rather than all N results. Therefore, we
266 cannot allow this pattern when we want to get all the results and in
267 the correct order (as is the case when this computation is in an
268 inner-loop nested in an outer-loop that us being vectorized). */
270 static gimple
271 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
272 tree *type_out)
274 gimple stmt, last_stmt = (*stmts)[0];
275 tree oprnd0, oprnd1;
276 tree oprnd00, oprnd01;
277 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
278 tree type, half_type;
279 gimple pattern_stmt;
280 tree prod_type;
281 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
282 struct loop *loop;
283 tree var;
284 bool promotion;
286 if (!loop_info)
287 return NULL;
289 loop = LOOP_VINFO_LOOP (loop_info);
291 if (!is_gimple_assign (last_stmt))
292 return NULL;
294 type = gimple_expr_type (last_stmt);
296 /* Look for the following pattern
297 DX = (TYPE1) X;
298 DY = (TYPE1) Y;
299 DPROD = DX * DY;
300 DDPROD = (TYPE2) DPROD;
301 sum_1 = DDPROD + sum_0;
302 In which
303 - DX is double the size of X
304 - DY is double the size of Y
305 - DX, DY, DPROD all have the same type
306 - sum is the same size of DPROD or bigger
307 - sum has been recognized as a reduction variable.
309 This is equivalent to:
310 DPROD = X w* Y; #widen mult
311 sum_1 = DPROD w+ sum_0; #widen summation
313 DPROD = X w* Y; #widen mult
314 sum_1 = DPROD + sum_0; #summation
317 /* Starting from LAST_STMT, follow the defs of its uses in search
318 of the above pattern. */
320 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
321 return NULL;
323 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
325 /* Has been detected as widening-summation? */
327 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
328 type = gimple_expr_type (stmt);
329 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
330 return NULL;
331 oprnd0 = gimple_assign_rhs1 (stmt);
332 oprnd1 = gimple_assign_rhs2 (stmt);
333 half_type = TREE_TYPE (oprnd0);
335 else
337 gimple def_stmt;
339 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
340 return NULL;
341 oprnd0 = gimple_assign_rhs1 (last_stmt);
342 oprnd1 = gimple_assign_rhs2 (last_stmt);
343 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
344 || !types_compatible_p (TREE_TYPE (oprnd1), type))
345 return NULL;
346 stmt = last_stmt;
348 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
349 &promotion)
350 && promotion)
352 stmt = def_stmt;
353 oprnd0 = gimple_assign_rhs1 (stmt);
355 else
356 half_type = type;
359 /* So far so good. Since last_stmt was detected as a (summation) reduction,
360 we know that oprnd1 is the reduction variable (defined by a loop-header
361 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
362 Left to check that oprnd0 is defined by a (widen_)mult_expr */
363 if (TREE_CODE (oprnd0) != SSA_NAME)
364 return NULL;
366 prod_type = half_type;
367 stmt = SSA_NAME_DEF_STMT (oprnd0);
369 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
370 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
371 return NULL;
373 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
374 inside the loop (in case we are analyzing an outer-loop). */
375 if (!is_gimple_assign (stmt))
376 return NULL;
377 stmt_vinfo = vinfo_for_stmt (stmt);
378 gcc_assert (stmt_vinfo);
379 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
380 return NULL;
381 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
382 return NULL;
383 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
385 /* Has been detected as a widening multiplication? */
387 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
388 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
389 return NULL;
390 stmt_vinfo = vinfo_for_stmt (stmt);
391 gcc_assert (stmt_vinfo);
392 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
393 oprnd00 = gimple_assign_rhs1 (stmt);
394 oprnd01 = gimple_assign_rhs2 (stmt);
396 else
398 tree half_type0, half_type1;
399 gimple def_stmt;
400 tree oprnd0, oprnd1;
402 oprnd0 = gimple_assign_rhs1 (stmt);
403 oprnd1 = gimple_assign_rhs2 (stmt);
404 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
405 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
406 return NULL;
407 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
408 &promotion)
409 || !promotion)
410 return NULL;
411 oprnd00 = gimple_assign_rhs1 (def_stmt);
412 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
413 &promotion)
414 || !promotion)
415 return NULL;
416 oprnd01 = gimple_assign_rhs1 (def_stmt);
417 if (!types_compatible_p (half_type0, half_type1))
418 return NULL;
419 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
420 return NULL;
423 half_type = TREE_TYPE (oprnd00);
424 *type_in = half_type;
425 *type_out = type;
427 /* Pattern detected. Create a stmt to be used to replace the pattern: */
428 var = vect_recog_temp_ssa_var (type, NULL);
429 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
430 oprnd00, oprnd01, oprnd1);
432 if (dump_enabled_p ())
434 dump_printf_loc (MSG_NOTE, vect_location,
435 "vect_recog_dot_prod_pattern: detected: ");
436 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
437 dump_printf (MSG_NOTE, "\n");
440 /* We don't allow changing the order of the computation in the inner-loop
441 when doing outer-loop vectorization. */
442 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
444 return pattern_stmt;
448 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
449 and LSHIFT_EXPR.
451 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
452 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
454 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
455 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
456 that satisfies the above restrictions, we can perform a widening opeartion
457 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
458 with a_it = (interm_type) a_t; */
460 static bool
461 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
462 tree const_oprnd, tree *oprnd,
463 vec<gimple> *stmts, tree type,
464 tree *half_type, gimple def_stmt)
466 tree new_type, new_oprnd;
467 gimple new_stmt;
469 if (code != MULT_EXPR && code != LSHIFT_EXPR)
470 return false;
472 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
473 || (code == LSHIFT_EXPR
474 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
475 != 1))
476 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
478 /* CONST_OPRND is a constant of HALF_TYPE. */
479 *oprnd = gimple_assign_rhs1 (def_stmt);
480 return true;
483 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
484 return false;
486 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
487 return false;
489 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
490 a type 2 times bigger than HALF_TYPE. */
491 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
492 TYPE_UNSIGNED (type));
493 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
494 || (code == LSHIFT_EXPR
495 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
496 return false;
498 /* Use NEW_TYPE for widening operation. */
499 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
501 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
502 /* Check if the already created pattern stmt is what we need. */
503 if (!is_gimple_assign (new_stmt)
504 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
505 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
506 return false;
508 stmts->safe_push (def_stmt);
509 *oprnd = gimple_assign_lhs (new_stmt);
511 else
513 /* Create a_T = (NEW_TYPE) a_t; */
514 *oprnd = gimple_assign_rhs1 (def_stmt);
515 new_oprnd = make_ssa_name (new_type, NULL);
516 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
517 NULL_TREE);
518 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
519 stmts->safe_push (def_stmt);
520 *oprnd = new_oprnd;
523 *half_type = new_type;
524 return true;
528 /* Function vect_recog_widen_mult_pattern
530 Try to find the following pattern:
532 type a_t, b_t;
533 TYPE a_T, b_T, prod_T;
535 S1 a_t = ;
536 S2 b_t = ;
537 S3 a_T = (TYPE) a_t;
538 S4 b_T = (TYPE) b_t;
539 S5 prod_T = a_T * b_T;
541 where type 'TYPE' is at least double the size of type 'type'.
543 Also detect unsigned cases:
545 unsigned type a_t, b_t;
546 unsigned TYPE u_prod_T;
547 TYPE a_T, b_T, prod_T;
549 S1 a_t = ;
550 S2 b_t = ;
551 S3 a_T = (TYPE) a_t;
552 S4 b_T = (TYPE) b_t;
553 S5 prod_T = a_T * b_T;
554 S6 u_prod_T = (unsigned TYPE) prod_T;
556 and multiplication by constants:
558 type a_t;
559 TYPE a_T, prod_T;
561 S1 a_t = ;
562 S3 a_T = (TYPE) a_t;
563 S5 prod_T = a_T * CONST;
565 A special case of multiplication by constants is when 'TYPE' is 4 times
566 bigger than 'type', but CONST fits an intermediate type 2 times smaller
567 than 'TYPE'. In that case we create an additional pattern stmt for S3
568 to create a variable of the intermediate type, and perform widen-mult
569 on the intermediate type as well:
571 type a_t;
572 interm_type a_it;
573 TYPE a_T, prod_T, prod_T';
575 S1 a_t = ;
576 S3 a_T = (TYPE) a_t;
577 '--> a_it = (interm_type) a_t;
578 S5 prod_T = a_T * CONST;
579 '--> prod_T' = a_it w* CONST;
581 Input/Output:
583 * STMTS: Contains a stmt from which the pattern search begins. In the
584 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
585 is detected. In case of unsigned widen-mult, the original stmt (S5) is
586 replaced with S6 in STMTS. In case of multiplication by a constant
587 of an intermediate type (the last case above), STMTS also contains S3
588 (inserted before S5).
590 Output:
592 * TYPE_IN: The type of the input arguments to the pattern.
594 * TYPE_OUT: The type of the output of this pattern.
596 * Return value: A new stmt that will be used to replace the sequence of
597 stmts that constitute the pattern. In this case it will be:
598 WIDEN_MULT <a_t, b_t>
601 static gimple
602 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
603 tree *type_in, tree *type_out)
605 gimple last_stmt = stmts->pop ();
606 gimple def_stmt0, def_stmt1;
607 tree oprnd0, oprnd1;
608 tree type, half_type0, half_type1;
609 gimple pattern_stmt;
610 tree vectype, vectype_out = NULL_TREE;
611 tree var;
612 enum tree_code dummy_code;
613 int dummy_int;
614 vec<tree> dummy_vec;
615 bool op1_ok;
616 bool promotion;
618 if (!is_gimple_assign (last_stmt))
619 return NULL;
621 type = gimple_expr_type (last_stmt);
623 /* Starting from LAST_STMT, follow the defs of its uses in search
624 of the above pattern. */
626 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
627 return NULL;
629 oprnd0 = gimple_assign_rhs1 (last_stmt);
630 oprnd1 = gimple_assign_rhs2 (last_stmt);
631 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
632 || !types_compatible_p (TREE_TYPE (oprnd1), type))
633 return NULL;
635 /* Check argument 0. */
636 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
637 &promotion)
638 || !promotion)
639 return NULL;
640 /* Check argument 1. */
641 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
642 &def_stmt1, &promotion);
644 if (op1_ok && promotion)
646 oprnd0 = gimple_assign_rhs1 (def_stmt0);
647 oprnd1 = gimple_assign_rhs1 (def_stmt1);
649 else
651 if (TREE_CODE (oprnd1) == INTEGER_CST
652 && TREE_CODE (half_type0) == INTEGER_TYPE
653 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
654 &oprnd0, stmts, type,
655 &half_type0, def_stmt0))
657 half_type1 = half_type0;
658 oprnd1 = fold_convert (half_type1, oprnd1);
660 else
661 return NULL;
664 /* Handle unsigned case. Look for
665 S6 u_prod_T = (unsigned TYPE) prod_T;
666 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
667 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
669 gimple use_stmt;
670 tree use_lhs;
671 tree use_type;
673 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
674 return NULL;
676 use_stmt = vect_single_imm_use (last_stmt);
677 if (!use_stmt || !is_gimple_assign (use_stmt)
678 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
679 return NULL;
681 use_lhs = gimple_assign_lhs (use_stmt);
682 use_type = TREE_TYPE (use_lhs);
683 if (!INTEGRAL_TYPE_P (use_type)
684 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
685 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
686 return NULL;
688 type = use_type;
689 last_stmt = use_stmt;
692 if (!types_compatible_p (half_type0, half_type1))
693 return NULL;
695 /* Pattern detected. */
696 if (dump_enabled_p ())
697 dump_printf_loc (MSG_NOTE, vect_location,
698 "vect_recog_widen_mult_pattern: detected:\n");
700 /* Check target support */
701 vectype = get_vectype_for_scalar_type (half_type0);
702 vectype_out = get_vectype_for_scalar_type (type);
703 if (!vectype
704 || !vectype_out
705 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
706 vectype_out, vectype,
707 &dummy_code, &dummy_code,
708 &dummy_int, &dummy_vec))
709 return NULL;
711 *type_in = vectype;
712 *type_out = vectype_out;
714 /* Pattern supported. Create a stmt to be used to replace the pattern: */
715 var = vect_recog_temp_ssa_var (type, NULL);
716 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
717 oprnd1);
719 if (dump_enabled_p ())
720 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
722 stmts->safe_push (last_stmt);
723 return pattern_stmt;
727 /* Function vect_recog_pow_pattern
729 Try to find the following pattern:
731 x = POW (y, N);
733 with POW being one of pow, powf, powi, powif and N being
734 either 2 or 0.5.
736 Input:
738 * LAST_STMT: A stmt from which the pattern search begins.
740 Output:
742 * TYPE_IN: The type of the input arguments to the pattern.
744 * TYPE_OUT: The type of the output of this pattern.
746 * Return value: A new stmt that will be used to replace the sequence of
747 stmts that constitute the pattern. In this case it will be:
748 x = x * x
750 x = sqrt (x)
753 static gimple
754 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
755 tree *type_out)
757 gimple last_stmt = (*stmts)[0];
758 tree fn, base, exp = NULL;
759 gimple stmt;
760 tree var;
762 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
763 return NULL;
765 fn = gimple_call_fndecl (last_stmt);
766 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
767 return NULL;
769 switch (DECL_FUNCTION_CODE (fn))
771 case BUILT_IN_POWIF:
772 case BUILT_IN_POWI:
773 case BUILT_IN_POWF:
774 case BUILT_IN_POW:
775 base = gimple_call_arg (last_stmt, 0);
776 exp = gimple_call_arg (last_stmt, 1);
777 if (TREE_CODE (exp) != REAL_CST
778 && TREE_CODE (exp) != INTEGER_CST)
779 return NULL;
780 break;
782 default:
783 return NULL;
786 /* We now have a pow or powi builtin function call with a constant
787 exponent. */
789 *type_out = NULL_TREE;
791 /* Catch squaring. */
792 if ((tree_fits_shwi_p (exp)
793 && tree_to_shwi (exp) == 2)
794 || (TREE_CODE (exp) == REAL_CST
795 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
797 *type_in = TREE_TYPE (base);
799 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
800 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
801 return stmt;
804 /* Catch square root. */
805 if (TREE_CODE (exp) == REAL_CST
806 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
808 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
809 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
810 if (*type_in)
812 gimple stmt = gimple_build_call (newfn, 1, base);
813 if (vectorizable_function (stmt, *type_in, *type_in)
814 != NULL_TREE)
816 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
817 gimple_call_set_lhs (stmt, var);
818 return stmt;
823 return NULL;
827 /* Function vect_recog_widen_sum_pattern
829 Try to find the following pattern:
831 type x_t;
832 TYPE x_T, sum = init;
833 loop:
834 sum_0 = phi <init, sum_1>
835 S1 x_t = *p;
836 S2 x_T = (TYPE) x_t;
837 S3 sum_1 = x_T + sum_0;
839 where type 'TYPE' is at least double the size of type 'type', i.e - we're
840 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
841 a special case of a reduction computation.
843 Input:
845 * LAST_STMT: A stmt from which the pattern search begins. In the example,
846 when this function is called with S3, the pattern {S2,S3} will be detected.
848 Output:
850 * TYPE_IN: The type of the input arguments to the pattern.
852 * TYPE_OUT: The type of the output of this pattern.
854 * Return value: A new stmt that will be used to replace the sequence of
855 stmts that constitute the pattern. In this case it will be:
856 WIDEN_SUM <x_t, sum_0>
858 Note: The widening-sum idiom is a widening reduction pattern that is
859 vectorized without preserving all the intermediate results. It
860 produces only N/2 (widened) results (by summing up pairs of
861 intermediate results) rather than all N results. Therefore, we
862 cannot allow this pattern when we want to get all the results and in
863 the correct order (as is the case when this computation is in an
864 inner-loop nested in an outer-loop that us being vectorized). */
866 static gimple
867 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
868 tree *type_out)
870 gimple stmt, last_stmt = (*stmts)[0];
871 tree oprnd0, oprnd1;
872 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
873 tree type, half_type;
874 gimple pattern_stmt;
875 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
876 struct loop *loop;
877 tree var;
878 bool promotion;
880 if (!loop_info)
881 return NULL;
883 loop = LOOP_VINFO_LOOP (loop_info);
885 if (!is_gimple_assign (last_stmt))
886 return NULL;
888 type = gimple_expr_type (last_stmt);
890 /* Look for the following pattern
891 DX = (TYPE) X;
892 sum_1 = DX + sum_0;
893 In which DX is at least double the size of X, and sum_1 has been
894 recognized as a reduction variable.
897 /* Starting from LAST_STMT, follow the defs of its uses in search
898 of the above pattern. */
900 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
901 return NULL;
903 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
904 return NULL;
906 oprnd0 = gimple_assign_rhs1 (last_stmt);
907 oprnd1 = gimple_assign_rhs2 (last_stmt);
908 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
909 || !types_compatible_p (TREE_TYPE (oprnd1), type))
910 return NULL;
912 /* So far so good. Since last_stmt was detected as a (summation) reduction,
913 we know that oprnd1 is the reduction variable (defined by a loop-header
914 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
915 Left to check that oprnd0 is defined by a cast from type 'type' to type
916 'TYPE'. */
918 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
919 &promotion)
920 || !promotion)
921 return NULL;
923 oprnd0 = gimple_assign_rhs1 (stmt);
924 *type_in = half_type;
925 *type_out = type;
927 /* Pattern detected. Create a stmt to be used to replace the pattern: */
928 var = vect_recog_temp_ssa_var (type, NULL);
929 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
930 oprnd0, oprnd1);
932 if (dump_enabled_p ())
934 dump_printf_loc (MSG_NOTE, vect_location,
935 "vect_recog_widen_sum_pattern: detected: ");
936 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
937 dump_printf (MSG_NOTE, "\n");
940 /* We don't allow changing the order of the computation in the inner-loop
941 when doing outer-loop vectorization. */
942 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
944 return pattern_stmt;
948 /* Return TRUE if the operation in STMT can be performed on a smaller type.
950 Input:
951 STMT - a statement to check.
952 DEF - we support operations with two operands, one of which is constant.
953 The other operand can be defined by a demotion operation, or by a
954 previous statement in a sequence of over-promoted operations. In the
955 later case DEF is used to replace that operand. (It is defined by a
956 pattern statement we created for the previous statement in the
957 sequence).
959 Input/output:
960 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
961 NULL, it's the type of DEF.
962 STMTS - additional pattern statements. If a pattern statement (type
963 conversion) is created in this function, its original statement is
964 added to STMTS.
966 Output:
967 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
968 operands to use in the new pattern statement for STMT (will be created
969 in vect_recog_over_widening_pattern ()).
970 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
971 statements for STMT: the first one is a type promotion and the second
972 one is the operation itself. We return the type promotion statement
973 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
974 the second pattern statement. */
976 static bool
977 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
978 tree *op0, tree *op1, gimple *new_def_stmt,
979 vec<gimple> *stmts)
981 enum tree_code code;
982 tree const_oprnd, oprnd;
983 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
984 gimple def_stmt, new_stmt;
985 bool first = false;
986 bool promotion;
988 *op0 = NULL_TREE;
989 *op1 = NULL_TREE;
990 *new_def_stmt = NULL;
992 if (!is_gimple_assign (stmt))
993 return false;
995 code = gimple_assign_rhs_code (stmt);
996 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
997 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
998 return false;
1000 oprnd = gimple_assign_rhs1 (stmt);
1001 const_oprnd = gimple_assign_rhs2 (stmt);
1002 type = gimple_expr_type (stmt);
1004 if (TREE_CODE (oprnd) != SSA_NAME
1005 || TREE_CODE (const_oprnd) != INTEGER_CST)
1006 return false;
1008 /* If oprnd has other uses besides that in stmt we cannot mark it
1009 as being part of a pattern only. */
1010 if (!has_single_use (oprnd))
1011 return false;
1013 /* If we are in the middle of a sequence, we use DEF from a previous
1014 statement. Otherwise, OPRND has to be a result of type promotion. */
1015 if (*new_type)
1017 half_type = *new_type;
1018 oprnd = def;
1020 else
1022 first = true;
1023 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1024 &promotion)
1025 || !promotion
1026 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1027 return false;
1030 /* Can we perform the operation on a smaller type? */
1031 switch (code)
1033 case BIT_IOR_EXPR:
1034 case BIT_XOR_EXPR:
1035 case BIT_AND_EXPR:
1036 if (!int_fits_type_p (const_oprnd, half_type))
1038 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1039 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1040 return false;
1042 interm_type = build_nonstandard_integer_type (
1043 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1044 if (!int_fits_type_p (const_oprnd, interm_type))
1045 return false;
1048 break;
1050 case LSHIFT_EXPR:
1051 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1052 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1053 return false;
1055 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1056 (e.g., if the original value was char, the shift amount is at most 8
1057 if we want to use short). */
1058 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1059 return false;
1061 interm_type = build_nonstandard_integer_type (
1062 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1064 if (!vect_supportable_shift (code, interm_type))
1065 return false;
1067 break;
1069 case RSHIFT_EXPR:
1070 if (vect_supportable_shift (code, half_type))
1071 break;
1073 /* Try intermediate type - HALF_TYPE is not supported. */
1074 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1075 return false;
1077 interm_type = build_nonstandard_integer_type (
1078 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1080 if (!vect_supportable_shift (code, interm_type))
1081 return false;
1083 break;
1085 default:
1086 gcc_unreachable ();
1089 /* There are four possible cases:
1090 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1091 the first statement in the sequence)
1092 a. The original, HALF_TYPE, is not enough - we replace the promotion
1093 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1094 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1095 promotion.
1096 2. OPRND is defined by a pattern statement we created.
1097 a. Its type is not sufficient for the operation, we create a new stmt:
1098 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1099 this statement in NEW_DEF_STMT, and it is later put in
1100 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1101 b. OPRND is good to use in the new statement. */
1102 if (first)
1104 if (interm_type)
1106 /* Replace the original type conversion HALF_TYPE->TYPE with
1107 HALF_TYPE->INTERM_TYPE. */
1108 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1110 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1111 /* Check if the already created pattern stmt is what we need. */
1112 if (!is_gimple_assign (new_stmt)
1113 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1114 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1115 return false;
1117 stmts->safe_push (def_stmt);
1118 oprnd = gimple_assign_lhs (new_stmt);
1120 else
1122 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1123 oprnd = gimple_assign_rhs1 (def_stmt);
1124 new_oprnd = make_ssa_name (interm_type, NULL);
1125 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1126 oprnd, NULL_TREE);
1127 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1128 stmts->safe_push (def_stmt);
1129 oprnd = new_oprnd;
1132 else
1134 /* Retrieve the operand before the type promotion. */
1135 oprnd = gimple_assign_rhs1 (def_stmt);
1138 else
1140 if (interm_type)
1142 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1143 new_oprnd = make_ssa_name (interm_type, NULL);
1144 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1145 oprnd, NULL_TREE);
1146 oprnd = new_oprnd;
1147 *new_def_stmt = new_stmt;
1150 /* Otherwise, OPRND is already set. */
1153 if (interm_type)
1154 *new_type = interm_type;
1155 else
1156 *new_type = half_type;
1158 *op0 = oprnd;
1159 *op1 = fold_convert (*new_type, const_oprnd);
1161 return true;
1165 /* Try to find a statement or a sequence of statements that can be performed
1166 on a smaller type:
1168 type x_t;
1169 TYPE x_T, res0_T, res1_T;
1170 loop:
1171 S1 x_t = *p;
1172 S2 x_T = (TYPE) x_t;
1173 S3 res0_T = op (x_T, C0);
1174 S4 res1_T = op (res0_T, C1);
1175 S5 ... = () res1_T; - type demotion
1177 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1178 constants.
1179 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1180 be 'type' or some intermediate type. For now, we expect S5 to be a type
1181 demotion operation. We also check that S3 and S4 have only one use. */
1183 static gimple
1184 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1185 tree *type_in, tree *type_out)
1187 gimple stmt = stmts->pop ();
1188 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1189 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1190 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1191 bool first;
1192 tree type = NULL;
1194 first = true;
1195 while (1)
1197 if (!vinfo_for_stmt (stmt)
1198 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1199 return NULL;
1201 new_def_stmt = NULL;
1202 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1203 &op0, &op1, &new_def_stmt,
1204 stmts))
1206 if (first)
1207 return NULL;
1208 else
1209 break;
1212 /* STMT can be performed on a smaller type. Check its uses. */
1213 use_stmt = vect_single_imm_use (stmt);
1214 if (!use_stmt || !is_gimple_assign (use_stmt))
1215 return NULL;
1217 /* Create pattern statement for STMT. */
1218 vectype = get_vectype_for_scalar_type (new_type);
1219 if (!vectype)
1220 return NULL;
1222 /* We want to collect all the statements for which we create pattern
1223 statetments, except for the case when the last statement in the
1224 sequence doesn't have a corresponding pattern statement. In such
1225 case we associate the last pattern statement with the last statement
1226 in the sequence. Therefore, we only add the original statement to
1227 the list if we know that it is not the last. */
1228 if (prev_stmt)
1229 stmts->safe_push (prev_stmt);
1231 var = vect_recog_temp_ssa_var (new_type, NULL);
1232 pattern_stmt
1233 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1234 op0, op1);
1235 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1236 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1238 if (dump_enabled_p ())
1240 dump_printf_loc (MSG_NOTE, vect_location,
1241 "created pattern stmt: ");
1242 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1243 dump_printf (MSG_NOTE, "\n");
1246 type = gimple_expr_type (stmt);
1247 prev_stmt = stmt;
1248 stmt = use_stmt;
1250 first = false;
1253 /* We got a sequence. We expect it to end with a type demotion operation.
1254 Otherwise, we quit (for now). There are three possible cases: the
1255 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1256 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1257 NEW_TYPE differs (we create a new conversion statement). */
1258 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1260 use_lhs = gimple_assign_lhs (use_stmt);
1261 use_type = TREE_TYPE (use_lhs);
1262 /* Support only type demotion or signedess change. */
1263 if (!INTEGRAL_TYPE_P (use_type)
1264 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1265 return NULL;
1267 /* Check that NEW_TYPE is not bigger than the conversion result. */
1268 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1269 return NULL;
1271 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1272 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1274 /* Create NEW_TYPE->USE_TYPE conversion. */
1275 new_oprnd = make_ssa_name (use_type, NULL);
1276 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1277 var, NULL_TREE);
1278 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1280 *type_in = get_vectype_for_scalar_type (new_type);
1281 *type_out = get_vectype_for_scalar_type (use_type);
1283 /* We created a pattern statement for the last statement in the
1284 sequence, so we don't need to associate it with the pattern
1285 statement created for PREV_STMT. Therefore, we add PREV_STMT
1286 to the list in order to mark it later in vect_pattern_recog_1. */
1287 if (prev_stmt)
1288 stmts->safe_push (prev_stmt);
1290 else
1292 if (prev_stmt)
1293 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1294 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1296 *type_in = vectype;
1297 *type_out = NULL_TREE;
1300 stmts->safe_push (use_stmt);
1302 else
1303 /* TODO: support general case, create a conversion to the correct type. */
1304 return NULL;
1306 /* Pattern detected. */
1307 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_NOTE, vect_location,
1310 "vect_recog_over_widening_pattern: detected: ");
1311 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1312 dump_printf (MSG_NOTE, "\n");
1315 return pattern_stmt;
1318 /* Detect widening shift pattern:
1320 type a_t;
1321 TYPE a_T, res_T;
1323 S1 a_t = ;
1324 S2 a_T = (TYPE) a_t;
1325 S3 res_T = a_T << CONST;
1327 where type 'TYPE' is at least double the size of type 'type'.
1329 Also detect cases where the shift result is immediately converted
1330 to another type 'result_type' that is no larger in size than 'TYPE'.
1331 In those cases we perform a widen-shift that directly results in
1332 'result_type', to avoid a possible over-widening situation:
1334 type a_t;
1335 TYPE a_T, res_T;
1336 result_type res_result;
1338 S1 a_t = ;
1339 S2 a_T = (TYPE) a_t;
1340 S3 res_T = a_T << CONST;
1341 S4 res_result = (result_type) res_T;
1342 '--> res_result' = a_t w<< CONST;
1344 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1345 create an additional pattern stmt for S2 to create a variable of an
1346 intermediate type, and perform widen-shift on the intermediate type:
1348 type a_t;
1349 interm_type a_it;
1350 TYPE a_T, res_T, res_T';
1352 S1 a_t = ;
1353 S2 a_T = (TYPE) a_t;
1354 '--> a_it = (interm_type) a_t;
1355 S3 res_T = a_T << CONST;
1356 '--> res_T' = a_it <<* CONST;
1358 Input/Output:
1360 * STMTS: Contains a stmt from which the pattern search begins.
1361 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1362 in STMTS. When an intermediate type is used and a pattern statement is
1363 created for S2, we also put S2 here (before S3).
1365 Output:
1367 * TYPE_IN: The type of the input arguments to the pattern.
1369 * TYPE_OUT: The type of the output of this pattern.
1371 * Return value: A new stmt that will be used to replace the sequence of
1372 stmts that constitute the pattern. In this case it will be:
1373 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1375 static gimple
1376 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1377 tree *type_in, tree *type_out)
1379 gimple last_stmt = stmts->pop ();
1380 gimple def_stmt0;
1381 tree oprnd0, oprnd1;
1382 tree type, half_type0;
1383 gimple pattern_stmt;
1384 tree vectype, vectype_out = NULL_TREE;
1385 tree var;
1386 enum tree_code dummy_code;
1387 int dummy_int;
1388 vec<tree> dummy_vec;
1389 gimple use_stmt;
1390 bool promotion;
1392 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1393 return NULL;
1395 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1396 return NULL;
1398 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1399 return NULL;
1401 oprnd0 = gimple_assign_rhs1 (last_stmt);
1402 oprnd1 = gimple_assign_rhs2 (last_stmt);
1403 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1404 return NULL;
1406 /* Check operand 0: it has to be defined by a type promotion. */
1407 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1408 &promotion)
1409 || !promotion)
1410 return NULL;
1412 /* Check operand 1: has to be positive. We check that it fits the type
1413 in vect_handle_widen_op_by_const (). */
1414 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1415 return NULL;
1417 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1418 type = gimple_expr_type (last_stmt);
1420 /* Check for subsequent conversion to another type. */
1421 use_stmt = vect_single_imm_use (last_stmt);
1422 if (use_stmt && is_gimple_assign (use_stmt)
1423 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1424 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1426 tree use_lhs = gimple_assign_lhs (use_stmt);
1427 tree use_type = TREE_TYPE (use_lhs);
1429 if (INTEGRAL_TYPE_P (use_type)
1430 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1432 last_stmt = use_stmt;
1433 type = use_type;
1437 /* Check if this a widening operation. */
1438 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1439 &oprnd0, stmts,
1440 type, &half_type0, def_stmt0))
1441 return NULL;
1443 /* Pattern detected. */
1444 if (dump_enabled_p ())
1445 dump_printf_loc (MSG_NOTE, vect_location,
1446 "vect_recog_widen_shift_pattern: detected:\n");
1448 /* Check target support. */
1449 vectype = get_vectype_for_scalar_type (half_type0);
1450 vectype_out = get_vectype_for_scalar_type (type);
1452 if (!vectype
1453 || !vectype_out
1454 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1455 vectype_out, vectype,
1456 &dummy_code, &dummy_code,
1457 &dummy_int, &dummy_vec))
1458 return NULL;
1460 *type_in = vectype;
1461 *type_out = vectype_out;
1463 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1464 var = vect_recog_temp_ssa_var (type, NULL);
1465 pattern_stmt =
1466 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1468 if (dump_enabled_p ())
1469 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1471 stmts->safe_push (last_stmt);
1472 return pattern_stmt;
1475 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1477 type a_t, b_t, c_t;
1479 S0 a_t = b_t r<< c_t;
1481 Input/Output:
1483 * STMTS: Contains a stmt from which the pattern search begins,
1484 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1485 with a sequence:
1487 S1 d_t = -c_t;
1488 S2 e_t = d_t & (B - 1);
1489 S3 f_t = b_t << c_t;
1490 S4 g_t = b_t >> e_t;
1491 S0 a_t = f_t | g_t;
1493 where B is element bitsize of type.
1495 Output:
1497 * TYPE_IN: The type of the input arguments to the pattern.
1499 * TYPE_OUT: The type of the output of this pattern.
1501 * Return value: A new stmt that will be used to replace the rotate
1502 S0 stmt. */
1504 static gimple
1505 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1507 gimple last_stmt = stmts->pop ();
1508 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1509 gimple pattern_stmt, def_stmt;
1510 enum tree_code rhs_code;
1511 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1512 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1513 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1514 enum vect_def_type dt;
1515 optab optab1, optab2;
1516 edge ext_def = NULL;
1518 if (!is_gimple_assign (last_stmt))
1519 return NULL;
1521 rhs_code = gimple_assign_rhs_code (last_stmt);
1522 switch (rhs_code)
1524 case LROTATE_EXPR:
1525 case RROTATE_EXPR:
1526 break;
1527 default:
1528 return NULL;
1531 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1532 return NULL;
1534 lhs = gimple_assign_lhs (last_stmt);
1535 oprnd0 = gimple_assign_rhs1 (last_stmt);
1536 type = TREE_TYPE (oprnd0);
1537 oprnd1 = gimple_assign_rhs2 (last_stmt);
1538 if (TREE_CODE (oprnd0) != SSA_NAME
1539 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1540 || !INTEGRAL_TYPE_P (type)
1541 || !TYPE_UNSIGNED (type))
1542 return NULL;
1544 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1545 &def, &dt))
1546 return NULL;
1548 if (dt != vect_internal_def
1549 && dt != vect_constant_def
1550 && dt != vect_external_def)
1551 return NULL;
1553 vectype = get_vectype_for_scalar_type (type);
1554 if (vectype == NULL_TREE)
1555 return NULL;
1557 /* If vector/vector or vector/scalar rotate is supported by the target,
1558 don't do anything here. */
1559 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1560 if (optab1
1561 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1562 return NULL;
1564 if (bb_vinfo != NULL || dt != vect_internal_def)
1566 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1567 if (optab2
1568 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1569 return NULL;
1572 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1573 don't do anything here either. */
1574 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1575 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1576 if (!optab1
1577 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1578 || !optab2
1579 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1581 if (bb_vinfo == NULL && dt == vect_internal_def)
1582 return NULL;
1583 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1584 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1585 if (!optab1
1586 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1587 || !optab2
1588 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1589 return NULL;
1592 *type_in = vectype;
1593 *type_out = vectype;
1594 if (*type_in == NULL_TREE)
1595 return NULL;
1597 if (dt == vect_external_def
1598 && TREE_CODE (oprnd1) == SSA_NAME
1599 && loop_vinfo)
1601 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1602 ext_def = loop_preheader_edge (loop);
1603 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1605 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1606 if (bb == NULL
1607 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1608 ext_def = NULL;
1612 def = NULL_TREE;
1613 if (TREE_CODE (oprnd1) == INTEGER_CST
1614 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1615 def = oprnd1;
1616 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1618 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1619 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1620 && TYPE_PRECISION (TREE_TYPE (rhs1))
1621 == TYPE_PRECISION (type))
1622 def = rhs1;
1625 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1626 if (def == NULL_TREE)
1628 def = vect_recog_temp_ssa_var (type, NULL);
1629 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1630 NULL_TREE);
1631 if (ext_def)
1633 basic_block new_bb
1634 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1635 gcc_assert (!new_bb);
1637 else
1638 append_pattern_def_seq (stmt_vinfo, def_stmt);
1640 stype = TREE_TYPE (def);
1642 if (TREE_CODE (def) == INTEGER_CST)
1644 if (!tree_fits_uhwi_p (def)
1645 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1646 || integer_zerop (def))
1647 return NULL;
1648 def2 = build_int_cst (stype,
1649 GET_MODE_PRECISION (TYPE_MODE (type))
1650 - tree_to_uhwi (def));
1652 else
1654 tree vecstype = get_vectype_for_scalar_type (stype);
1655 stmt_vec_info def_stmt_vinfo;
1657 if (vecstype == NULL_TREE)
1658 return NULL;
1659 def2 = vect_recog_temp_ssa_var (stype, NULL);
1660 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1661 NULL_TREE);
1662 if (ext_def)
1664 basic_block new_bb
1665 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1666 gcc_assert (!new_bb);
1668 else
1670 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1671 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1672 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1673 append_pattern_def_seq (stmt_vinfo, def_stmt);
1676 def2 = vect_recog_temp_ssa_var (stype, NULL);
1677 tree mask
1678 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1679 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1680 gimple_assign_lhs (def_stmt),
1681 mask);
1682 if (ext_def)
1684 basic_block new_bb
1685 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1686 gcc_assert (!new_bb);
1688 else
1690 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1691 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1692 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1693 append_pattern_def_seq (stmt_vinfo, def_stmt);
1697 var1 = vect_recog_temp_ssa_var (type, NULL);
1698 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1699 ? LSHIFT_EXPR : RSHIFT_EXPR,
1700 var1, oprnd0, def);
1701 append_pattern_def_seq (stmt_vinfo, def_stmt);
1703 var2 = vect_recog_temp_ssa_var (type, NULL);
1704 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1705 ? RSHIFT_EXPR : LSHIFT_EXPR,
1706 var2, oprnd0, def2);
1707 append_pattern_def_seq (stmt_vinfo, def_stmt);
1709 /* Pattern detected. */
1710 if (dump_enabled_p ())
1711 dump_printf_loc (MSG_NOTE, vect_location,
1712 "vect_recog_rotate_pattern: detected:\n");
1714 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1715 var = vect_recog_temp_ssa_var (type, NULL);
1716 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1718 if (dump_enabled_p ())
1719 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1721 stmts->safe_push (last_stmt);
1722 return pattern_stmt;
1725 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1726 vectorized:
1728 type a_t;
1729 TYPE b_T, res_T;
1731 S1 a_t = ;
1732 S2 b_T = ;
1733 S3 res_T = b_T op a_t;
1735 where type 'TYPE' is a type with different size than 'type',
1736 and op is <<, >> or rotate.
1738 Also detect cases:
1740 type a_t;
1741 TYPE b_T, c_T, res_T;
1743 S0 c_T = ;
1744 S1 a_t = (type) c_T;
1745 S2 b_T = ;
1746 S3 res_T = b_T op a_t;
1748 Input/Output:
1750 * STMTS: Contains a stmt from which the pattern search begins,
1751 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1752 with a shift/rotate which has same type on both operands, in the
1753 second case just b_T op c_T, in the first case with added cast
1754 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1756 Output:
1758 * TYPE_IN: The type of the input arguments to the pattern.
1760 * TYPE_OUT: The type of the output of this pattern.
1762 * Return value: A new stmt that will be used to replace the shift/rotate
1763 S3 stmt. */
1765 static gimple
1766 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1767 tree *type_in, tree *type_out)
1769 gimple last_stmt = stmts->pop ();
1770 tree oprnd0, oprnd1, lhs, var;
1771 gimple pattern_stmt, def_stmt;
1772 enum tree_code rhs_code;
1773 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1774 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1775 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1776 enum vect_def_type dt;
1777 tree def;
1779 if (!is_gimple_assign (last_stmt))
1780 return NULL;
1782 rhs_code = gimple_assign_rhs_code (last_stmt);
1783 switch (rhs_code)
1785 case LSHIFT_EXPR:
1786 case RSHIFT_EXPR:
1787 case LROTATE_EXPR:
1788 case RROTATE_EXPR:
1789 break;
1790 default:
1791 return NULL;
1794 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1795 return NULL;
1797 lhs = gimple_assign_lhs (last_stmt);
1798 oprnd0 = gimple_assign_rhs1 (last_stmt);
1799 oprnd1 = gimple_assign_rhs2 (last_stmt);
1800 if (TREE_CODE (oprnd0) != SSA_NAME
1801 || TREE_CODE (oprnd1) != SSA_NAME
1802 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1803 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1804 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1805 || TYPE_PRECISION (TREE_TYPE (lhs))
1806 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1807 return NULL;
1809 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1810 &def, &dt))
1811 return NULL;
1813 if (dt != vect_internal_def)
1814 return NULL;
1816 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1817 *type_out = *type_in;
1818 if (*type_in == NULL_TREE)
1819 return NULL;
1821 def = NULL_TREE;
1822 if (gimple_assign_cast_p (def_stmt))
1824 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1825 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1826 && TYPE_PRECISION (TREE_TYPE (rhs1))
1827 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1828 def = rhs1;
1831 if (def == NULL_TREE)
1833 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1834 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1835 NULL_TREE);
1836 new_pattern_def_seq (stmt_vinfo, def_stmt);
1839 /* Pattern detected. */
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_NOTE, vect_location,
1842 "vect_recog_vector_vector_shift_pattern: detected:\n");
1844 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1845 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1846 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1848 if (dump_enabled_p ())
1849 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1851 stmts->safe_push (last_stmt);
1852 return pattern_stmt;
1855 /* Detect a signed division by a constant that wouldn't be
1856 otherwise vectorized:
1858 type a_t, b_t;
1860 S1 a_t = b_t / N;
1862 where type 'type' is an integral type and N is a constant.
1864 Similarly handle modulo by a constant:
1866 S4 a_t = b_t % N;
1868 Input/Output:
1870 * STMTS: Contains a stmt from which the pattern search begins,
1871 i.e. the division stmt. S1 is replaced by if N is a power
1872 of two constant and type is signed:
1873 S3 y_t = b_t < 0 ? N - 1 : 0;
1874 S2 x_t = b_t + y_t;
1875 S1' a_t = x_t >> log2 (N);
1877 S4 is replaced if N is a power of two constant and
1878 type is signed by (where *_T temporaries have unsigned type):
1879 S9 y_T = b_t < 0 ? -1U : 0U;
1880 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1881 S7 z_t = (type) z_T;
1882 S6 w_t = b_t + z_t;
1883 S5 x_t = w_t & (N - 1);
1884 S4' a_t = x_t - z_t;
1886 Output:
1888 * TYPE_IN: The type of the input arguments to the pattern.
1890 * TYPE_OUT: The type of the output of this pattern.
1892 * Return value: A new stmt that will be used to replace the division
1893 S1 or modulo S4 stmt. */
1895 static gimple
1896 vect_recog_divmod_pattern (vec<gimple> *stmts,
1897 tree *type_in, tree *type_out)
1899 gimple last_stmt = stmts->pop ();
1900 tree oprnd0, oprnd1, vectype, itype, cond;
1901 gimple pattern_stmt, def_stmt;
1902 enum tree_code rhs_code;
1903 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1904 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1905 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1906 optab optab;
1907 tree q;
1908 int dummy_int, prec;
1909 stmt_vec_info def_stmt_vinfo;
1911 if (!is_gimple_assign (last_stmt))
1912 return NULL;
1914 rhs_code = gimple_assign_rhs_code (last_stmt);
1915 switch (rhs_code)
1917 case TRUNC_DIV_EXPR:
1918 case TRUNC_MOD_EXPR:
1919 break;
1920 default:
1921 return NULL;
1924 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1925 return NULL;
1927 oprnd0 = gimple_assign_rhs1 (last_stmt);
1928 oprnd1 = gimple_assign_rhs2 (last_stmt);
1929 itype = TREE_TYPE (oprnd0);
1930 if (TREE_CODE (oprnd0) != SSA_NAME
1931 || TREE_CODE (oprnd1) != INTEGER_CST
1932 || TREE_CODE (itype) != INTEGER_TYPE
1933 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1934 return NULL;
1936 vectype = get_vectype_for_scalar_type (itype);
1937 if (vectype == NULL_TREE)
1938 return NULL;
1940 /* If the target can handle vectorized division or modulo natively,
1941 don't attempt to optimize this. */
1942 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1943 if (optab != unknown_optab)
1945 enum machine_mode vec_mode = TYPE_MODE (vectype);
1946 int icode = (int) optab_handler (optab, vec_mode);
1947 if (icode != CODE_FOR_nothing)
1948 return NULL;
1951 prec = TYPE_PRECISION (itype);
1952 if (integer_pow2p (oprnd1))
1954 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1955 return NULL;
1957 /* Pattern detected. */
1958 if (dump_enabled_p ())
1959 dump_printf_loc (MSG_NOTE, vect_location,
1960 "vect_recog_divmod_pattern: detected:\n");
1962 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1963 build_int_cst (itype, 0));
1964 if (rhs_code == TRUNC_DIV_EXPR)
1966 tree var = vect_recog_temp_ssa_var (itype, NULL);
1967 tree shift;
1968 def_stmt
1969 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1970 fold_build2 (MINUS_EXPR, itype,
1971 oprnd1,
1972 build_int_cst (itype,
1973 1)),
1974 build_int_cst (itype, 0));
1975 new_pattern_def_seq (stmt_vinfo, def_stmt);
1976 var = vect_recog_temp_ssa_var (itype, NULL);
1977 def_stmt
1978 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1979 gimple_assign_lhs (def_stmt));
1980 append_pattern_def_seq (stmt_vinfo, def_stmt);
1982 shift = build_int_cst (itype, tree_log2 (oprnd1));
1983 pattern_stmt
1984 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1985 vect_recog_temp_ssa_var (itype,
1986 NULL),
1987 var, shift);
1989 else
1991 tree signmask;
1992 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1993 if (compare_tree_int (oprnd1, 2) == 0)
1995 signmask = vect_recog_temp_ssa_var (itype, NULL);
1996 def_stmt
1997 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1998 build_int_cst (itype, 1),
1999 build_int_cst (itype, 0));
2000 append_pattern_def_seq (stmt_vinfo, def_stmt);
2002 else
2004 tree utype
2005 = build_nonstandard_integer_type (prec, 1);
2006 tree vecutype = get_vectype_for_scalar_type (utype);
2007 tree shift
2008 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2009 - tree_log2 (oprnd1));
2010 tree var = vect_recog_temp_ssa_var (utype, NULL);
2012 def_stmt
2013 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2014 build_int_cst (utype, -1),
2015 build_int_cst (utype, 0));
2016 def_stmt_vinfo
2017 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2018 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2019 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2020 append_pattern_def_seq (stmt_vinfo, def_stmt);
2021 var = vect_recog_temp_ssa_var (utype, NULL);
2022 def_stmt
2023 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2024 gimple_assign_lhs (def_stmt),
2025 shift);
2026 def_stmt_vinfo
2027 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2028 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2029 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2030 append_pattern_def_seq (stmt_vinfo, def_stmt);
2031 signmask = vect_recog_temp_ssa_var (itype, NULL);
2032 def_stmt
2033 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2034 NULL_TREE);
2035 append_pattern_def_seq (stmt_vinfo, def_stmt);
2037 def_stmt
2038 = gimple_build_assign_with_ops (PLUS_EXPR,
2039 vect_recog_temp_ssa_var (itype,
2040 NULL),
2041 oprnd0, signmask);
2042 append_pattern_def_seq (stmt_vinfo, def_stmt);
2043 def_stmt
2044 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2045 vect_recog_temp_ssa_var (itype,
2046 NULL),
2047 gimple_assign_lhs (def_stmt),
2048 fold_build2 (MINUS_EXPR, itype,
2049 oprnd1,
2050 build_int_cst (itype,
2051 1)));
2052 append_pattern_def_seq (stmt_vinfo, def_stmt);
2054 pattern_stmt
2055 = gimple_build_assign_with_ops (MINUS_EXPR,
2056 vect_recog_temp_ssa_var (itype,
2057 NULL),
2058 gimple_assign_lhs (def_stmt),
2059 signmask);
2062 if (dump_enabled_p ())
2063 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2066 stmts->safe_push (last_stmt);
2068 *type_in = vectype;
2069 *type_out = vectype;
2070 return pattern_stmt;
2073 if (prec > HOST_BITS_PER_WIDE_INT
2074 || integer_zerop (oprnd1))
2075 return NULL;
2077 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2078 return NULL;
2080 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2082 if (TYPE_UNSIGNED (itype))
2084 unsigned HOST_WIDE_INT mh, ml;
2085 int pre_shift, post_shift;
2086 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2087 & GET_MODE_MASK (TYPE_MODE (itype)));
2088 tree t1, t2, t3, t4;
2090 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2091 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2092 return NULL;
2094 /* Find a suitable multiplier and right shift count
2095 instead of multiplying with D. */
2096 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2098 /* If the suggested multiplier is more than SIZE bits, we can do better
2099 for even divisors, using an initial right shift. */
2100 if (mh != 0 && (d & 1) == 0)
2102 pre_shift = floor_log2 (d & -d);
2103 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2104 &ml, &post_shift, &dummy_int);
2105 gcc_assert (!mh);
2107 else
2108 pre_shift = 0;
2110 if (mh != 0)
2112 if (post_shift - 1 >= prec)
2113 return NULL;
2115 /* t1 = oprnd0 h* ml;
2116 t2 = oprnd0 - t1;
2117 t3 = t2 >> 1;
2118 t4 = t1 + t3;
2119 q = t4 >> (post_shift - 1); */
2120 t1 = vect_recog_temp_ssa_var (itype, NULL);
2121 def_stmt
2122 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2123 build_int_cst (itype, ml));
2124 append_pattern_def_seq (stmt_vinfo, def_stmt);
2126 t2 = vect_recog_temp_ssa_var (itype, NULL);
2127 def_stmt
2128 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2129 append_pattern_def_seq (stmt_vinfo, def_stmt);
2131 t3 = vect_recog_temp_ssa_var (itype, NULL);
2132 def_stmt
2133 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2134 integer_one_node);
2135 append_pattern_def_seq (stmt_vinfo, def_stmt);
2137 t4 = vect_recog_temp_ssa_var (itype, NULL);
2138 def_stmt
2139 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2141 if (post_shift != 1)
2143 append_pattern_def_seq (stmt_vinfo, def_stmt);
2145 q = vect_recog_temp_ssa_var (itype, NULL);
2146 pattern_stmt
2147 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2148 build_int_cst (itype,
2149 post_shift
2150 - 1));
2152 else
2154 q = t4;
2155 pattern_stmt = def_stmt;
2158 else
2160 if (pre_shift >= prec || post_shift >= prec)
2161 return NULL;
2163 /* t1 = oprnd0 >> pre_shift;
2164 t2 = t1 h* ml;
2165 q = t2 >> post_shift; */
2166 if (pre_shift)
2168 t1 = vect_recog_temp_ssa_var (itype, NULL);
2169 def_stmt
2170 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2171 build_int_cst (NULL,
2172 pre_shift));
2173 append_pattern_def_seq (stmt_vinfo, def_stmt);
2175 else
2176 t1 = oprnd0;
2178 t2 = vect_recog_temp_ssa_var (itype, NULL);
2179 def_stmt
2180 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2181 build_int_cst (itype, ml));
2183 if (post_shift)
2185 append_pattern_def_seq (stmt_vinfo, def_stmt);
2187 q = vect_recog_temp_ssa_var (itype, NULL);
2188 def_stmt
2189 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2190 build_int_cst (itype,
2191 post_shift));
2193 else
2194 q = t2;
2196 pattern_stmt = def_stmt;
2199 else
2201 unsigned HOST_WIDE_INT ml;
2202 int post_shift;
2203 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2204 unsigned HOST_WIDE_INT abs_d;
2205 bool add = false;
2206 tree t1, t2, t3, t4;
2208 /* Give up for -1. */
2209 if (d == -1)
2210 return NULL;
2212 /* Since d might be INT_MIN, we have to cast to
2213 unsigned HOST_WIDE_INT before negating to avoid
2214 undefined signed overflow. */
2215 abs_d = (d >= 0
2216 ? (unsigned HOST_WIDE_INT) d
2217 : - (unsigned HOST_WIDE_INT) d);
2219 /* n rem d = n rem -d */
2220 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2222 d = abs_d;
2223 oprnd1 = build_int_cst (itype, abs_d);
2225 else if (HOST_BITS_PER_WIDE_INT >= prec
2226 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2227 /* This case is not handled correctly below. */
2228 return NULL;
2230 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2231 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2233 add = true;
2234 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2236 if (post_shift >= prec)
2237 return NULL;
2239 /* t1 = oprnd0 h* ml; */
2240 t1 = vect_recog_temp_ssa_var (itype, NULL);
2241 def_stmt
2242 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2243 build_int_cst (itype, ml));
2245 if (add)
2247 /* t2 = t1 + oprnd0; */
2248 append_pattern_def_seq (stmt_vinfo, def_stmt);
2249 t2 = vect_recog_temp_ssa_var (itype, NULL);
2250 def_stmt
2251 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2253 else
2254 t2 = t1;
2256 if (post_shift)
2258 /* t3 = t2 >> post_shift; */
2259 append_pattern_def_seq (stmt_vinfo, def_stmt);
2260 t3 = vect_recog_temp_ssa_var (itype, NULL);
2261 def_stmt
2262 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2263 build_int_cst (itype, post_shift));
2265 else
2266 t3 = t2;
2268 double_int oprnd0_min, oprnd0_max;
2269 int msb = 1;
2270 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2272 if (!oprnd0_min.is_negative ())
2273 msb = 0;
2274 else if (oprnd0_max.is_negative ())
2275 msb = -1;
2278 if (msb == 0 && d >= 0)
2280 /* q = t3; */
2281 q = t3;
2282 pattern_stmt = def_stmt;
2284 else
2286 /* t4 = oprnd0 >> (prec - 1);
2287 or if we know from VRP that oprnd0 >= 0
2288 t4 = 0;
2289 or if we know from VRP that oprnd0 < 0
2290 t4 = -1; */
2291 append_pattern_def_seq (stmt_vinfo, def_stmt);
2292 t4 = vect_recog_temp_ssa_var (itype, NULL);
2293 if (msb != 1)
2294 def_stmt
2295 = gimple_build_assign_with_ops (INTEGER_CST,
2296 t4, build_int_cst (itype, msb),
2297 NULL_TREE);
2298 else
2299 def_stmt
2300 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2301 build_int_cst (itype, prec - 1));
2302 append_pattern_def_seq (stmt_vinfo, def_stmt);
2304 /* q = t3 - t4; or q = t4 - t3; */
2305 q = vect_recog_temp_ssa_var (itype, NULL);
2306 pattern_stmt
2307 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2308 d < 0 ? t3 : t4);
2312 if (rhs_code == TRUNC_MOD_EXPR)
2314 tree r, t1;
2316 /* We divided. Now finish by:
2317 t1 = q * oprnd1;
2318 r = oprnd0 - t1; */
2319 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2321 t1 = vect_recog_temp_ssa_var (itype, NULL);
2322 def_stmt
2323 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2324 append_pattern_def_seq (stmt_vinfo, def_stmt);
2326 r = vect_recog_temp_ssa_var (itype, NULL);
2327 pattern_stmt
2328 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2331 /* Pattern detected. */
2332 if (dump_enabled_p ())
2334 dump_printf_loc (MSG_NOTE, vect_location,
2335 "vect_recog_divmod_pattern: detected: ");
2336 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2337 dump_printf (MSG_NOTE, "\n");
2340 stmts->safe_push (last_stmt);
2342 *type_in = vectype;
2343 *type_out = vectype;
2344 return pattern_stmt;
2347 /* Function vect_recog_mixed_size_cond_pattern
2349 Try to find the following pattern:
2351 type x_t, y_t;
2352 TYPE a_T, b_T, c_T;
2353 loop:
2354 S1 a_T = x_t CMP y_t ? b_T : c_T;
2356 where type 'TYPE' is an integral type which has different size
2357 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2358 than 'type', the constants need to fit into an integer type
2359 with the same width as 'type') or results of conversion from 'type'.
2361 Input:
2363 * LAST_STMT: A stmt from which the pattern search begins.
2365 Output:
2367 * TYPE_IN: The type of the input arguments to the pattern.
2369 * TYPE_OUT: The type of the output of this pattern.
2371 * Return value: A new stmt that will be used to replace the pattern.
2372 Additionally a def_stmt is added.
2374 a_it = x_t CMP y_t ? b_it : c_it;
2375 a_T = (TYPE) a_it; */
2377 static gimple
2378 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2379 tree *type_out)
2381 gimple last_stmt = (*stmts)[0];
2382 tree cond_expr, then_clause, else_clause;
2383 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2384 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2385 enum machine_mode cmpmode;
2386 gimple pattern_stmt, def_stmt;
2387 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2388 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2389 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2390 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2391 bool promotion;
2392 tree comp_scalar_type;
2394 if (!is_gimple_assign (last_stmt)
2395 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2396 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2397 return NULL;
2399 cond_expr = gimple_assign_rhs1 (last_stmt);
2400 then_clause = gimple_assign_rhs2 (last_stmt);
2401 else_clause = gimple_assign_rhs3 (last_stmt);
2403 if (!COMPARISON_CLASS_P (cond_expr))
2404 return NULL;
2406 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2407 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2408 if (comp_vectype == NULL_TREE)
2409 return NULL;
2411 type = gimple_expr_type (last_stmt);
2412 if (types_compatible_p (type, comp_scalar_type)
2413 || ((TREE_CODE (then_clause) != INTEGER_CST
2414 || TREE_CODE (else_clause) != INTEGER_CST)
2415 && !INTEGRAL_TYPE_P (comp_scalar_type))
2416 || !INTEGRAL_TYPE_P (type))
2417 return NULL;
2419 if ((TREE_CODE (then_clause) != INTEGER_CST
2420 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2421 &def_stmt0, &promotion))
2422 || (TREE_CODE (else_clause) != INTEGER_CST
2423 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2424 &def_stmt1, &promotion)))
2425 return NULL;
2427 if (orig_type0 && orig_type1
2428 && !types_compatible_p (orig_type0, orig_type1))
2429 return NULL;
2431 if (orig_type0)
2433 if (!types_compatible_p (orig_type0, comp_scalar_type))
2434 return NULL;
2435 then_clause = gimple_assign_rhs1 (def_stmt0);
2436 itype = orig_type0;
2439 if (orig_type1)
2441 if (!types_compatible_p (orig_type1, comp_scalar_type))
2442 return NULL;
2443 else_clause = gimple_assign_rhs1 (def_stmt1);
2444 itype = orig_type1;
2447 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2449 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2450 return NULL;
2452 vectype = get_vectype_for_scalar_type (type);
2453 if (vectype == NULL_TREE)
2454 return NULL;
2456 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2457 return NULL;
2459 if (itype == NULL_TREE)
2460 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2461 TYPE_UNSIGNED (type));
2463 if (itype == NULL_TREE
2464 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2465 return NULL;
2467 vecitype = get_vectype_for_scalar_type (itype);
2468 if (vecitype == NULL_TREE)
2469 return NULL;
2471 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2472 return NULL;
2474 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2476 if ((TREE_CODE (then_clause) == INTEGER_CST
2477 && !int_fits_type_p (then_clause, itype))
2478 || (TREE_CODE (else_clause) == INTEGER_CST
2479 && !int_fits_type_p (else_clause, itype)))
2480 return NULL;
2483 def_stmt
2484 = gimple_build_assign_with_ops (COND_EXPR,
2485 vect_recog_temp_ssa_var (itype, NULL),
2486 unshare_expr (cond_expr),
2487 fold_convert (itype, then_clause),
2488 fold_convert (itype, else_clause));
2489 pattern_stmt
2490 = gimple_build_assign_with_ops (NOP_EXPR,
2491 vect_recog_temp_ssa_var (type, NULL),
2492 gimple_assign_lhs (def_stmt), NULL_TREE);
2494 new_pattern_def_seq (stmt_vinfo, def_stmt);
2495 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2496 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2497 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2498 *type_in = vecitype;
2499 *type_out = vectype;
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_NOTE, vect_location,
2503 "vect_recog_mixed_size_cond_pattern: detected:\n");
2505 return pattern_stmt;
2509 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2510 true if bool VAR can be optimized that way. */
2512 static bool
2513 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2515 gimple def_stmt;
2516 enum vect_def_type dt;
2517 tree def, rhs1;
2518 enum tree_code rhs_code;
2520 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2521 &dt))
2522 return false;
2524 if (dt != vect_internal_def)
2525 return false;
2527 if (!is_gimple_assign (def_stmt))
2528 return false;
2530 if (!has_single_use (def))
2531 return false;
2533 rhs1 = gimple_assign_rhs1 (def_stmt);
2534 rhs_code = gimple_assign_rhs_code (def_stmt);
2535 switch (rhs_code)
2537 case SSA_NAME:
2538 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2540 CASE_CONVERT:
2541 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2542 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2543 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2544 return false;
2545 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2547 case BIT_NOT_EXPR:
2548 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2550 case BIT_AND_EXPR:
2551 case BIT_IOR_EXPR:
2552 case BIT_XOR_EXPR:
2553 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2554 return false;
2555 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2556 bb_vinfo);
2558 default:
2559 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2561 tree vecitype, comp_vectype;
2563 /* If the comparison can throw, then is_gimple_condexpr will be
2564 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2565 if (stmt_could_throw_p (def_stmt))
2566 return false;
2568 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2569 if (comp_vectype == NULL_TREE)
2570 return false;
2572 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2574 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2575 tree itype
2576 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2577 vecitype = get_vectype_for_scalar_type (itype);
2578 if (vecitype == NULL_TREE)
2579 return false;
2581 else
2582 vecitype = comp_vectype;
2583 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2585 return false;
2590 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2591 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2592 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2594 static tree
2595 adjust_bool_pattern_cast (tree type, tree var)
2597 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2598 gimple cast_stmt, pattern_stmt;
2600 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2601 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2602 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2603 cast_stmt
2604 = gimple_build_assign_with_ops (NOP_EXPR,
2605 vect_recog_temp_ssa_var (type, NULL),
2606 gimple_assign_lhs (pattern_stmt),
2607 NULL_TREE);
2608 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2609 return gimple_assign_lhs (cast_stmt);
2613 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2614 recursively. VAR is an SSA_NAME that should be transformed from bool
2615 to a wider integer type, OUT_TYPE is the desired final integer type of
2616 the whole pattern, TRUEVAL should be NULL unless optimizing
2617 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2618 in the then_clause, STMTS is where statements with added pattern stmts
2619 should be pushed to. */
2621 static tree
2622 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2623 vec<gimple> *stmts)
2625 gimple stmt = SSA_NAME_DEF_STMT (var);
2626 enum tree_code rhs_code, def_rhs_code;
2627 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2628 location_t loc;
2629 gimple pattern_stmt, def_stmt;
2631 rhs1 = gimple_assign_rhs1 (stmt);
2632 rhs2 = gimple_assign_rhs2 (stmt);
2633 rhs_code = gimple_assign_rhs_code (stmt);
2634 loc = gimple_location (stmt);
2635 switch (rhs_code)
2637 case SSA_NAME:
2638 CASE_CONVERT:
2639 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2640 itype = TREE_TYPE (irhs1);
2641 pattern_stmt
2642 = gimple_build_assign_with_ops (SSA_NAME,
2643 vect_recog_temp_ssa_var (itype, NULL),
2644 irhs1, NULL_TREE);
2645 break;
2647 case BIT_NOT_EXPR:
2648 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2649 itype = TREE_TYPE (irhs1);
2650 pattern_stmt
2651 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2652 vect_recog_temp_ssa_var (itype, NULL),
2653 irhs1, build_int_cst (itype, 1));
2654 break;
2656 case BIT_AND_EXPR:
2657 /* Try to optimize x = y & (a < b ? 1 : 0); into
2658 x = (a < b ? y : 0);
2660 E.g. for:
2661 bool a_b, b_b, c_b;
2662 TYPE d_T;
2664 S1 a_b = x1 CMP1 y1;
2665 S2 b_b = x2 CMP2 y2;
2666 S3 c_b = a_b & b_b;
2667 S4 d_T = (TYPE) c_b;
2669 we would normally emit:
2671 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2672 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2673 S3' c_T = a_T & b_T;
2674 S4' d_T = c_T;
2676 but we can save one stmt by using the
2677 result of one of the COND_EXPRs in the other COND_EXPR and leave
2678 BIT_AND_EXPR stmt out:
2680 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2681 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2682 S4' f_T = c_T;
2684 At least when VEC_COND_EXPR is implemented using masks
2685 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2686 computes the comparison masks and ands it, in one case with
2687 all ones vector, in the other case with a vector register.
2688 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2689 often more expensive. */
2690 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2691 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2692 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2694 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2695 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2696 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2697 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2699 gimple tstmt;
2700 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2701 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2702 tstmt = stmts->pop ();
2703 gcc_assert (tstmt == def_stmt);
2704 stmts->quick_push (stmt);
2705 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2706 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2707 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2708 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2709 return irhs2;
2711 else
2712 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2713 goto and_ior_xor;
2715 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2716 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2717 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2719 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2720 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2721 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2722 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2724 gimple tstmt;
2725 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2726 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2727 tstmt = stmts->pop ();
2728 gcc_assert (tstmt == def_stmt);
2729 stmts->quick_push (stmt);
2730 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2731 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2732 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2733 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2734 return irhs1;
2736 else
2737 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2738 goto and_ior_xor;
2740 /* FALLTHRU */
2741 case BIT_IOR_EXPR:
2742 case BIT_XOR_EXPR:
2743 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2744 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2745 and_ior_xor:
2746 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2747 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2749 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2750 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2751 int out_prec = TYPE_PRECISION (out_type);
2752 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2753 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2754 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2755 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2756 else
2758 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2759 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2762 itype = TREE_TYPE (irhs1);
2763 pattern_stmt
2764 = gimple_build_assign_with_ops (rhs_code,
2765 vect_recog_temp_ssa_var (itype, NULL),
2766 irhs1, irhs2);
2767 break;
2769 default:
2770 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2771 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2772 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2773 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2774 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2776 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2777 itype
2778 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2780 else
2781 itype = TREE_TYPE (rhs1);
2782 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2783 if (trueval == NULL_TREE)
2784 trueval = build_int_cst (itype, 1);
2785 else
2786 gcc_checking_assert (useless_type_conversion_p (itype,
2787 TREE_TYPE (trueval)));
2788 pattern_stmt
2789 = gimple_build_assign_with_ops (COND_EXPR,
2790 vect_recog_temp_ssa_var (itype, NULL),
2791 cond_expr, trueval,
2792 build_int_cst (itype, 0));
2793 break;
2796 stmts->safe_push (stmt);
2797 gimple_set_location (pattern_stmt, loc);
2798 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2799 return gimple_assign_lhs (pattern_stmt);
2803 /* Function vect_recog_bool_pattern
2805 Try to find pattern like following:
2807 bool a_b, b_b, c_b, d_b, e_b;
2808 TYPE f_T;
2809 loop:
2810 S1 a_b = x1 CMP1 y1;
2811 S2 b_b = x2 CMP2 y2;
2812 S3 c_b = a_b & b_b;
2813 S4 d_b = x3 CMP3 y3;
2814 S5 e_b = c_b | d_b;
2815 S6 f_T = (TYPE) e_b;
2817 where type 'TYPE' is an integral type.
2819 Input:
2821 * LAST_STMT: A stmt at the end from which the pattern
2822 search begins, i.e. cast of a bool to
2823 an integer type.
2825 Output:
2827 * TYPE_IN: The type of the input arguments to the pattern.
2829 * TYPE_OUT: The type of the output of this pattern.
2831 * Return value: A new stmt that will be used to replace the pattern.
2833 Assuming size of TYPE is the same as size of all comparisons
2834 (otherwise some casts would be added where needed), the above
2835 sequence we create related pattern stmts:
2836 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2837 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2838 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2839 S5' e_T = c_T | d_T;
2840 S6' f_T = e_T;
2842 Instead of the above S3' we could emit:
2843 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2844 S3' c_T = a_T | b_T;
2845 but the above is more efficient. */
2847 static gimple
2848 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2849 tree *type_out)
2851 gimple last_stmt = stmts->pop ();
2852 enum tree_code rhs_code;
2853 tree var, lhs, rhs, vectype;
2854 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2855 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2856 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2857 gimple pattern_stmt;
2859 if (!is_gimple_assign (last_stmt))
2860 return NULL;
2862 var = gimple_assign_rhs1 (last_stmt);
2863 lhs = gimple_assign_lhs (last_stmt);
2865 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2866 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2867 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2868 return NULL;
2870 rhs_code = gimple_assign_rhs_code (last_stmt);
2871 if (CONVERT_EXPR_CODE_P (rhs_code))
2873 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2874 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2875 return NULL;
2876 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2877 if (vectype == NULL_TREE)
2878 return NULL;
2880 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2881 return NULL;
2883 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2884 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2885 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2886 pattern_stmt
2887 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2888 else
2889 pattern_stmt
2890 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2891 *type_out = vectype;
2892 *type_in = vectype;
2893 stmts->safe_push (last_stmt);
2894 if (dump_enabled_p ())
2895 dump_printf_loc (MSG_NOTE, vect_location,
2896 "vect_recog_bool_pattern: detected:\n");
2898 return pattern_stmt;
2900 else if (rhs_code == SSA_NAME
2901 && STMT_VINFO_DATA_REF (stmt_vinfo))
2903 stmt_vec_info pattern_stmt_info;
2904 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2905 gcc_assert (vectype != NULL_TREE);
2906 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2907 return NULL;
2908 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2909 return NULL;
2911 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2912 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2913 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2915 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2916 gimple cast_stmt
2917 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2918 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2919 rhs = rhs2;
2921 pattern_stmt
2922 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2923 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2924 bb_vinfo);
2925 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2926 STMT_VINFO_DATA_REF (pattern_stmt_info)
2927 = STMT_VINFO_DATA_REF (stmt_vinfo);
2928 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2929 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2930 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2931 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2932 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2933 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2934 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2935 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2936 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2937 *type_out = vectype;
2938 *type_in = vectype;
2939 stmts->safe_push (last_stmt);
2940 if (dump_enabled_p ())
2941 dump_printf_loc (MSG_NOTE, vect_location,
2942 "vect_recog_bool_pattern: detected:\n");
2943 return pattern_stmt;
2945 else
2946 return NULL;
2950 /* Mark statements that are involved in a pattern. */
2952 static inline void
2953 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2954 tree pattern_vectype)
2956 stmt_vec_info pattern_stmt_info, def_stmt_info;
2957 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2958 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2959 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2960 gimple def_stmt;
2962 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2963 if (pattern_stmt_info == NULL)
2965 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2966 bb_vinfo);
2967 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2969 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2971 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2972 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2973 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2974 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2975 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2976 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2977 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2978 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2979 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2981 gimple_stmt_iterator si;
2982 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2983 !gsi_end_p (si); gsi_next (&si))
2985 def_stmt = gsi_stmt (si);
2986 def_stmt_info = vinfo_for_stmt (def_stmt);
2987 if (def_stmt_info == NULL)
2989 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2990 bb_vinfo);
2991 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2993 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2994 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2995 STMT_VINFO_DEF_TYPE (def_stmt_info)
2996 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2997 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2998 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3003 /* Function vect_pattern_recog_1
3005 Input:
3006 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3007 computation pattern.
3008 STMT: A stmt from which the pattern search should start.
3010 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3011 expression that computes the same functionality and can be used to
3012 replace the sequence of stmts that are involved in the pattern.
3014 Output:
3015 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3016 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3017 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3018 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3019 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3020 to the available target pattern.
3022 This function also does some bookkeeping, as explained in the documentation
3023 for vect_recog_pattern. */
3025 static void
3026 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3027 gimple_stmt_iterator si,
3028 vec<gimple> *stmts_to_replace)
3030 gimple stmt = gsi_stmt (si), pattern_stmt;
3031 stmt_vec_info stmt_info;
3032 loop_vec_info loop_vinfo;
3033 tree pattern_vectype;
3034 tree type_in, type_out;
3035 enum tree_code code;
3036 int i;
3037 gimple next;
3039 stmts_to_replace->truncate (0);
3040 stmts_to_replace->quick_push (stmt);
3041 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3042 if (!pattern_stmt)
3043 return;
3045 stmt = stmts_to_replace->last ();
3046 stmt_info = vinfo_for_stmt (stmt);
3047 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3049 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3051 /* No need to check target support (already checked by the pattern
3052 recognition function). */
3053 pattern_vectype = type_out ? type_out : type_in;
3055 else
3057 enum machine_mode vec_mode;
3058 enum insn_code icode;
3059 optab optab;
3061 /* Check target support */
3062 type_in = get_vectype_for_scalar_type (type_in);
3063 if (!type_in)
3064 return;
3065 if (type_out)
3066 type_out = get_vectype_for_scalar_type (type_out);
3067 else
3068 type_out = type_in;
3069 if (!type_out)
3070 return;
3071 pattern_vectype = type_out;
3073 if (is_gimple_assign (pattern_stmt))
3074 code = gimple_assign_rhs_code (pattern_stmt);
3075 else
3077 gcc_assert (is_gimple_call (pattern_stmt));
3078 code = CALL_EXPR;
3081 optab = optab_for_tree_code (code, type_in, optab_default);
3082 vec_mode = TYPE_MODE (type_in);
3083 if (!optab
3084 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3085 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3086 return;
3089 /* Found a vectorizable pattern. */
3090 if (dump_enabled_p ())
3092 dump_printf_loc (MSG_NOTE, vect_location,
3093 "pattern recognized: ");
3094 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3095 dump_printf (MSG_NOTE, "\n");
3098 /* Mark the stmts that are involved in the pattern. */
3099 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3101 /* Patterns cannot be vectorized using SLP, because they change the order of
3102 computation. */
3103 if (loop_vinfo)
3104 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3105 if (next == stmt)
3106 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3108 /* It is possible that additional pattern stmts are created and inserted in
3109 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3110 relevant statements. */
3111 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3112 && (unsigned) i < (stmts_to_replace->length () - 1);
3113 i++)
3115 stmt_info = vinfo_for_stmt (stmt);
3116 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3117 if (dump_enabled_p ())
3119 dump_printf_loc (MSG_NOTE, vect_location,
3120 "additional pattern stmt: ");
3121 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3122 dump_printf (MSG_NOTE, "\n");
3125 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3130 /* Function vect_pattern_recog
3132 Input:
3133 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3134 computation idioms.
3136 Output - for each computation idiom that is detected we create a new stmt
3137 that provides the same functionality and that can be vectorized. We
3138 also record some information in the struct_stmt_info of the relevant
3139 stmts, as explained below:
3141 At the entry to this function we have the following stmts, with the
3142 following initial value in the STMT_VINFO fields:
3144 stmt in_pattern_p related_stmt vec_stmt
3145 S1: a_i = .... - - -
3146 S2: a_2 = ..use(a_i).. - - -
3147 S3: a_1 = ..use(a_2).. - - -
3148 S4: a_0 = ..use(a_1).. - - -
3149 S5: ... = ..use(a_0).. - - -
3151 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3152 represented by a single stmt. We then:
3153 - create a new stmt S6 equivalent to the pattern (the stmt is not
3154 inserted into the code)
3155 - fill in the STMT_VINFO fields as follows:
3157 in_pattern_p related_stmt vec_stmt
3158 S1: a_i = .... - - -
3159 S2: a_2 = ..use(a_i).. - - -
3160 S3: a_1 = ..use(a_2).. - - -
3161 S4: a_0 = ..use(a_1).. true S6 -
3162 '---> S6: a_new = .... - S4 -
3163 S5: ... = ..use(a_0).. - - -
3165 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3166 to each other through the RELATED_STMT field).
3168 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3169 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3170 remain irrelevant unless used by stmts other than S4.
3172 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3173 (because they are marked as irrelevant). It will vectorize S6, and record
3174 a pointer to the new vector stmt VS6 from S6 (as usual).
3175 S4 will be skipped, and S5 will be vectorized as usual:
3177 in_pattern_p related_stmt vec_stmt
3178 S1: a_i = .... - - -
3179 S2: a_2 = ..use(a_i).. - - -
3180 S3: a_1 = ..use(a_2).. - - -
3181 > VS6: va_new = .... - - -
3182 S4: a_0 = ..use(a_1).. true S6 VS6
3183 '---> S6: a_new = .... - S4 VS6
3184 > VS5: ... = ..vuse(va_new).. - - -
3185 S5: ... = ..use(a_0).. - - -
3187 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3188 elsewhere), and we'll end up with:
3190 VS6: va_new = ....
3191 VS5: ... = ..vuse(va_new)..
3193 In case of more than one pattern statements, e.g., widen-mult with
3194 intermediate type:
3196 S1 a_t = ;
3197 S2 a_T = (TYPE) a_t;
3198 '--> S3: a_it = (interm_type) a_t;
3199 S4 prod_T = a_T * CONST;
3200 '--> S5: prod_T' = a_it w* CONST;
3202 there may be other users of a_T outside the pattern. In that case S2 will
3203 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3204 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3205 be recorded in S3. */
3207 void
3208 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3210 struct loop *loop;
3211 basic_block *bbs;
3212 unsigned int nbbs;
3213 gimple_stmt_iterator si;
3214 unsigned int i, j;
3215 vect_recog_func_ptr vect_recog_func;
3216 auto_vec<gimple, 1> stmts_to_replace;
3217 gimple stmt;
3219 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_NOTE, vect_location,
3221 "=== vect_pattern_recog ===\n");
3223 if (loop_vinfo)
3225 loop = LOOP_VINFO_LOOP (loop_vinfo);
3226 bbs = LOOP_VINFO_BBS (loop_vinfo);
3227 nbbs = loop->num_nodes;
3229 else
3231 bbs = &BB_VINFO_BB (bb_vinfo);
3232 nbbs = 1;
3235 /* Scan through the loop stmts, applying the pattern recognition
3236 functions starting at each stmt visited: */
3237 for (i = 0; i < nbbs; i++)
3239 basic_block bb = bbs[i];
3240 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3242 if (bb_vinfo && (stmt = gsi_stmt (si))
3243 && vinfo_for_stmt (stmt)
3244 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3245 continue;
3247 /* Scan over all generic vect_recog_xxx_pattern functions. */
3248 for (j = 0; j < NUM_PATTERNS; j++)
3250 vect_recog_func = vect_vect_recog_func_ptrs[j];
3251 vect_pattern_recog_1 (vect_recog_func, si,
3252 &stmts_to_replace);