2013-10-11 Marc Glisse <marc.glisse@inria.fr>
[official-gcc.git] / gcc / tree-vect-patterns.c
blob0a4e812fd823d574d6bc8a760185550cc11b19f9
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-ssa.h"
31 #include "cfgloop.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "params.h"
35 #include "tree-data-ref.h"
36 #include "tree-vectorizer.h"
37 #include "recog.h" /* FIXME: for insn_data */
38 #include "diagnostic-core.h"
39 #include "dumpfile.h"
41 /* Pattern recognition functions */
42 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
43 tree *);
44 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
45 tree *);
46 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
47 tree *);
48 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
49 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
50 tree *);
51 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
52 tree *, tree *);
53 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
55 tree *, tree *);
56 static gimple vect_recog_divmod_pattern (vec<gimple> *,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_widen_shift_pattern,
67 vect_recog_over_widening_pattern,
68 vect_recog_rotate_pattern,
69 vect_recog_vector_vector_shift_pattern,
70 vect_recog_divmod_pattern,
71 vect_recog_mixed_size_cond_pattern,
72 vect_recog_bool_pattern};
74 static inline void
75 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
77 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
78 stmt);
81 static inline void
82 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
84 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
85 append_pattern_def_seq (stmt_info, stmt);
88 /* Check whether STMT2 is in the same loop or basic block as STMT1.
89 Which of the two applies depends on whether we're currently doing
90 loop-based or basic-block-based vectorization, as determined by
91 the vinfo_for_stmt for STMT1 (which must be defined).
93 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
94 to be defined as well. */
96 static bool
97 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
99 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
100 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
101 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
103 if (!gimple_bb (stmt2))
104 return false;
106 if (loop_vinfo)
108 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
109 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
110 return false;
112 else
114 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
115 || gimple_code (stmt2) == GIMPLE_PHI)
116 return false;
119 gcc_assert (vinfo_for_stmt (stmt2));
120 return true;
123 /* If the LHS of DEF_STMT has a single use, and that statement is
124 in the same loop or basic block, return it. */
126 static gimple
127 vect_single_imm_use (gimple def_stmt)
129 tree lhs = gimple_assign_lhs (def_stmt);
130 use_operand_p use_p;
131 gimple use_stmt;
133 if (!single_imm_use (lhs, &use_p, &use_stmt))
134 return NULL;
136 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
137 return NULL;
139 return use_stmt;
142 /* Check whether NAME, an ssa-name used in USE_STMT,
143 is a result of a type promotion or demotion, such that:
144 DEF_STMT: NAME = NOP (name0)
145 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
146 If CHECK_SIGN is TRUE, check that either both types are signed or both are
147 unsigned. */
149 static bool
150 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
151 tree *orig_type, gimple *def_stmt, bool *promotion)
153 tree dummy;
154 gimple dummy_gimple;
155 loop_vec_info loop_vinfo;
156 stmt_vec_info stmt_vinfo;
157 tree type = TREE_TYPE (name);
158 tree oprnd0;
159 enum vect_def_type dt;
160 tree def;
161 bb_vec_info bb_vinfo;
163 stmt_vinfo = vinfo_for_stmt (use_stmt);
164 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
165 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
166 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
167 &def, &dt))
168 return false;
170 if (dt != vect_internal_def
171 && dt != vect_external_def && dt != vect_constant_def)
172 return false;
174 if (!*def_stmt)
175 return false;
177 if (!is_gimple_assign (*def_stmt))
178 return false;
180 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
181 return false;
183 oprnd0 = gimple_assign_rhs1 (*def_stmt);
185 *orig_type = TREE_TYPE (oprnd0);
186 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
187 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
188 return false;
190 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
191 *promotion = true;
192 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
193 *promotion = false;
194 else
195 return false;
197 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
198 bb_vinfo, &dummy_gimple, &dummy, &dt))
199 return false;
201 return true;
204 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
205 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
207 static tree
208 vect_recog_temp_ssa_var (tree type, gimple stmt)
210 return make_temp_ssa_name (type, stmt, "patt");
213 /* Function vect_recog_dot_prod_pattern
215 Try to find the following pattern:
217 type x_t, y_t;
218 TYPE1 prod;
219 TYPE2 sum = init;
220 loop:
221 sum_0 = phi <init, sum_1>
222 S1 x_t = ...
223 S2 y_t = ...
224 S3 x_T = (TYPE1) x_t;
225 S4 y_T = (TYPE1) y_t;
226 S5 prod = x_T * y_T;
227 [S6 prod = (TYPE2) prod; #optional]
228 S7 sum_1 = prod + sum_0;
230 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
231 same size of 'TYPE1' or bigger. This is a special case of a reduction
232 computation.
234 Input:
236 * STMTS: Contains a stmt from which the pattern search begins. In the
237 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
238 will be detected.
240 Output:
242 * TYPE_IN: The type of the input arguments to the pattern.
244 * TYPE_OUT: The type of the output of this pattern.
246 * Return value: A new stmt that will be used to replace the sequence of
247 stmts that constitute the pattern. In this case it will be:
248 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
250 Note: The dot-prod idiom is a widening reduction pattern that is
251 vectorized without preserving all the intermediate results. It
252 produces only N/2 (widened) results (by summing up pairs of
253 intermediate results) rather than all N results. Therefore, we
254 cannot allow this pattern when we want to get all the results and in
255 the correct order (as is the case when this computation is in an
256 inner-loop nested in an outer-loop that us being vectorized). */
258 static gimple
259 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
260 tree *type_out)
262 gimple stmt, last_stmt = (*stmts)[0];
263 tree oprnd0, oprnd1;
264 tree oprnd00, oprnd01;
265 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
266 tree type, half_type;
267 gimple pattern_stmt;
268 tree prod_type;
269 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
270 struct loop *loop;
271 tree var;
272 bool promotion;
274 if (!loop_info)
275 return NULL;
277 loop = LOOP_VINFO_LOOP (loop_info);
279 if (!is_gimple_assign (last_stmt))
280 return NULL;
282 type = gimple_expr_type (last_stmt);
284 /* Look for the following pattern
285 DX = (TYPE1) X;
286 DY = (TYPE1) Y;
287 DPROD = DX * DY;
288 DDPROD = (TYPE2) DPROD;
289 sum_1 = DDPROD + sum_0;
290 In which
291 - DX is double the size of X
292 - DY is double the size of Y
293 - DX, DY, DPROD all have the same type
294 - sum is the same size of DPROD or bigger
295 - sum has been recognized as a reduction variable.
297 This is equivalent to:
298 DPROD = X w* Y; #widen mult
299 sum_1 = DPROD w+ sum_0; #widen summation
301 DPROD = X w* Y; #widen mult
302 sum_1 = DPROD + sum_0; #summation
305 /* Starting from LAST_STMT, follow the defs of its uses in search
306 of the above pattern. */
308 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
309 return NULL;
311 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
313 /* Has been detected as widening-summation? */
315 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
316 type = gimple_expr_type (stmt);
317 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
318 return NULL;
319 oprnd0 = gimple_assign_rhs1 (stmt);
320 oprnd1 = gimple_assign_rhs2 (stmt);
321 half_type = TREE_TYPE (oprnd0);
323 else
325 gimple def_stmt;
327 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
328 return NULL;
329 oprnd0 = gimple_assign_rhs1 (last_stmt);
330 oprnd1 = gimple_assign_rhs2 (last_stmt);
331 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
332 || !types_compatible_p (TREE_TYPE (oprnd1), type))
333 return NULL;
334 stmt = last_stmt;
336 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
337 &promotion)
338 && promotion)
340 stmt = def_stmt;
341 oprnd0 = gimple_assign_rhs1 (stmt);
343 else
344 half_type = type;
347 /* So far so good. Since last_stmt was detected as a (summation) reduction,
348 we know that oprnd1 is the reduction variable (defined by a loop-header
349 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
350 Left to check that oprnd0 is defined by a (widen_)mult_expr */
351 if (TREE_CODE (oprnd0) != SSA_NAME)
352 return NULL;
354 prod_type = half_type;
355 stmt = SSA_NAME_DEF_STMT (oprnd0);
357 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
358 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
359 return NULL;
361 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
362 inside the loop (in case we are analyzing an outer-loop). */
363 if (!is_gimple_assign (stmt))
364 return NULL;
365 stmt_vinfo = vinfo_for_stmt (stmt);
366 gcc_assert (stmt_vinfo);
367 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
368 return NULL;
369 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
370 return NULL;
371 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
373 /* Has been detected as a widening multiplication? */
375 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
376 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
377 return NULL;
378 stmt_vinfo = vinfo_for_stmt (stmt);
379 gcc_assert (stmt_vinfo);
380 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
381 oprnd00 = gimple_assign_rhs1 (stmt);
382 oprnd01 = gimple_assign_rhs2 (stmt);
384 else
386 tree half_type0, half_type1;
387 gimple def_stmt;
388 tree oprnd0, oprnd1;
390 oprnd0 = gimple_assign_rhs1 (stmt);
391 oprnd1 = gimple_assign_rhs2 (stmt);
392 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
393 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
394 return NULL;
395 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
396 &promotion)
397 || !promotion)
398 return NULL;
399 oprnd00 = gimple_assign_rhs1 (def_stmt);
400 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
401 &promotion)
402 || !promotion)
403 return NULL;
404 oprnd01 = gimple_assign_rhs1 (def_stmt);
405 if (!types_compatible_p (half_type0, half_type1))
406 return NULL;
407 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
408 return NULL;
411 half_type = TREE_TYPE (oprnd00);
412 *type_in = half_type;
413 *type_out = type;
415 /* Pattern detected. Create a stmt to be used to replace the pattern: */
416 var = vect_recog_temp_ssa_var (type, NULL);
417 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
418 oprnd00, oprnd01, oprnd1);
420 if (dump_enabled_p ())
422 dump_printf_loc (MSG_NOTE, vect_location,
423 "vect_recog_dot_prod_pattern: detected: ");
424 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
425 dump_printf (MSG_NOTE, "\n");
428 /* We don't allow changing the order of the computation in the inner-loop
429 when doing outer-loop vectorization. */
430 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
432 return pattern_stmt;
436 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
437 and LSHIFT_EXPR.
439 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
440 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
442 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
443 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
444 that satisfies the above restrictions, we can perform a widening opeartion
445 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
446 with a_it = (interm_type) a_t; */
448 static bool
449 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
450 tree const_oprnd, tree *oprnd,
451 vec<gimple> *stmts, tree type,
452 tree *half_type, gimple def_stmt)
454 tree new_type, new_oprnd;
455 gimple new_stmt;
457 if (code != MULT_EXPR && code != LSHIFT_EXPR)
458 return false;
460 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
461 || (code == LSHIFT_EXPR
462 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
463 != 1))
464 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
466 /* CONST_OPRND is a constant of HALF_TYPE. */
467 *oprnd = gimple_assign_rhs1 (def_stmt);
468 return true;
471 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
472 return false;
474 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
475 return false;
477 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
478 a type 2 times bigger than HALF_TYPE. */
479 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
480 TYPE_UNSIGNED (type));
481 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
482 || (code == LSHIFT_EXPR
483 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
484 return false;
486 /* Use NEW_TYPE for widening operation. */
487 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
489 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
490 /* Check if the already created pattern stmt is what we need. */
491 if (!is_gimple_assign (new_stmt)
492 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
493 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
494 return false;
496 stmts->safe_push (def_stmt);
497 *oprnd = gimple_assign_lhs (new_stmt);
499 else
501 /* Create a_T = (NEW_TYPE) a_t; */
502 *oprnd = gimple_assign_rhs1 (def_stmt);
503 new_oprnd = make_ssa_name (new_type, NULL);
504 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
505 NULL_TREE);
506 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
507 stmts->safe_push (def_stmt);
508 *oprnd = new_oprnd;
511 *half_type = new_type;
512 return true;
516 /* Function vect_recog_widen_mult_pattern
518 Try to find the following pattern:
520 type a_t, b_t;
521 TYPE a_T, b_T, prod_T;
523 S1 a_t = ;
524 S2 b_t = ;
525 S3 a_T = (TYPE) a_t;
526 S4 b_T = (TYPE) b_t;
527 S5 prod_T = a_T * b_T;
529 where type 'TYPE' is at least double the size of type 'type'.
531 Also detect unsigned cases:
533 unsigned type a_t, b_t;
534 unsigned TYPE u_prod_T;
535 TYPE a_T, b_T, prod_T;
537 S1 a_t = ;
538 S2 b_t = ;
539 S3 a_T = (TYPE) a_t;
540 S4 b_T = (TYPE) b_t;
541 S5 prod_T = a_T * b_T;
542 S6 u_prod_T = (unsigned TYPE) prod_T;
544 and multiplication by constants:
546 type a_t;
547 TYPE a_T, prod_T;
549 S1 a_t = ;
550 S3 a_T = (TYPE) a_t;
551 S5 prod_T = a_T * CONST;
553 A special case of multiplication by constants is when 'TYPE' is 4 times
554 bigger than 'type', but CONST fits an intermediate type 2 times smaller
555 than 'TYPE'. In that case we create an additional pattern stmt for S3
556 to create a variable of the intermediate type, and perform widen-mult
557 on the intermediate type as well:
559 type a_t;
560 interm_type a_it;
561 TYPE a_T, prod_T, prod_T';
563 S1 a_t = ;
564 S3 a_T = (TYPE) a_t;
565 '--> a_it = (interm_type) a_t;
566 S5 prod_T = a_T * CONST;
567 '--> prod_T' = a_it w* CONST;
569 Input/Output:
571 * STMTS: Contains a stmt from which the pattern search begins. In the
572 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
573 is detected. In case of unsigned widen-mult, the original stmt (S5) is
574 replaced with S6 in STMTS. In case of multiplication by a constant
575 of an intermediate type (the last case above), STMTS also contains S3
576 (inserted before S5).
578 Output:
580 * TYPE_IN: The type of the input arguments to the pattern.
582 * TYPE_OUT: The type of the output of this pattern.
584 * Return value: A new stmt that will be used to replace the sequence of
585 stmts that constitute the pattern. In this case it will be:
586 WIDEN_MULT <a_t, b_t>
589 static gimple
590 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
591 tree *type_in, tree *type_out)
593 gimple last_stmt = stmts->pop ();
594 gimple def_stmt0, def_stmt1;
595 tree oprnd0, oprnd1;
596 tree type, half_type0, half_type1;
597 gimple pattern_stmt;
598 tree vectype, vectype_out = NULL_TREE;
599 tree var;
600 enum tree_code dummy_code;
601 int dummy_int;
602 vec<tree> dummy_vec;
603 bool op1_ok;
604 bool promotion;
606 if (!is_gimple_assign (last_stmt))
607 return NULL;
609 type = gimple_expr_type (last_stmt);
611 /* Starting from LAST_STMT, follow the defs of its uses in search
612 of the above pattern. */
614 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
615 return NULL;
617 oprnd0 = gimple_assign_rhs1 (last_stmt);
618 oprnd1 = gimple_assign_rhs2 (last_stmt);
619 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
620 || !types_compatible_p (TREE_TYPE (oprnd1), type))
621 return NULL;
623 /* Check argument 0. */
624 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
625 &promotion)
626 || !promotion)
627 return NULL;
628 /* Check argument 1. */
629 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
630 &def_stmt1, &promotion);
632 if (op1_ok && promotion)
634 oprnd0 = gimple_assign_rhs1 (def_stmt0);
635 oprnd1 = gimple_assign_rhs1 (def_stmt1);
637 else
639 if (TREE_CODE (oprnd1) == INTEGER_CST
640 && TREE_CODE (half_type0) == INTEGER_TYPE
641 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
642 &oprnd0, stmts, type,
643 &half_type0, def_stmt0))
645 half_type1 = half_type0;
646 oprnd1 = fold_convert (half_type1, oprnd1);
648 else
649 return NULL;
652 /* Handle unsigned case. Look for
653 S6 u_prod_T = (unsigned TYPE) prod_T;
654 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
655 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
657 gimple use_stmt;
658 tree use_lhs;
659 tree use_type;
661 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
662 return NULL;
664 use_stmt = vect_single_imm_use (last_stmt);
665 if (!use_stmt || !is_gimple_assign (use_stmt)
666 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
667 return NULL;
669 use_lhs = gimple_assign_lhs (use_stmt);
670 use_type = TREE_TYPE (use_lhs);
671 if (!INTEGRAL_TYPE_P (use_type)
672 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
673 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
674 return NULL;
676 type = use_type;
677 last_stmt = use_stmt;
680 if (!types_compatible_p (half_type0, half_type1))
681 return NULL;
683 /* Pattern detected. */
684 if (dump_enabled_p ())
685 dump_printf_loc (MSG_NOTE, vect_location,
686 "vect_recog_widen_mult_pattern: detected:\n");
688 /* Check target support */
689 vectype = get_vectype_for_scalar_type (half_type0);
690 vectype_out = get_vectype_for_scalar_type (type);
691 if (!vectype
692 || !vectype_out
693 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
694 vectype_out, vectype,
695 &dummy_code, &dummy_code,
696 &dummy_int, &dummy_vec))
697 return NULL;
699 *type_in = vectype;
700 *type_out = vectype_out;
702 /* Pattern supported. Create a stmt to be used to replace the pattern: */
703 var = vect_recog_temp_ssa_var (type, NULL);
704 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
705 oprnd1);
707 if (dump_enabled_p ())
708 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
710 stmts->safe_push (last_stmt);
711 return pattern_stmt;
715 /* Function vect_recog_pow_pattern
717 Try to find the following pattern:
719 x = POW (y, N);
721 with POW being one of pow, powf, powi, powif and N being
722 either 2 or 0.5.
724 Input:
726 * LAST_STMT: A stmt from which the pattern search begins.
728 Output:
730 * TYPE_IN: The type of the input arguments to the pattern.
732 * TYPE_OUT: The type of the output of this pattern.
734 * Return value: A new stmt that will be used to replace the sequence of
735 stmts that constitute the pattern. In this case it will be:
736 x = x * x
738 x = sqrt (x)
741 static gimple
742 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
743 tree *type_out)
745 gimple last_stmt = (*stmts)[0];
746 tree fn, base, exp = NULL;
747 gimple stmt;
748 tree var;
750 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
751 return NULL;
753 fn = gimple_call_fndecl (last_stmt);
754 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
755 return NULL;
757 switch (DECL_FUNCTION_CODE (fn))
759 case BUILT_IN_POWIF:
760 case BUILT_IN_POWI:
761 case BUILT_IN_POWF:
762 case BUILT_IN_POW:
763 base = gimple_call_arg (last_stmt, 0);
764 exp = gimple_call_arg (last_stmt, 1);
765 if (TREE_CODE (exp) != REAL_CST
766 && TREE_CODE (exp) != INTEGER_CST)
767 return NULL;
768 break;
770 default:
771 return NULL;
774 /* We now have a pow or powi builtin function call with a constant
775 exponent. */
777 *type_out = NULL_TREE;
779 /* Catch squaring. */
780 if ((host_integerp (exp, 0)
781 && tree_low_cst (exp, 0) == 2)
782 || (TREE_CODE (exp) == REAL_CST
783 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
785 *type_in = TREE_TYPE (base);
787 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
788 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
789 return stmt;
792 /* Catch square root. */
793 if (TREE_CODE (exp) == REAL_CST
794 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
796 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
797 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
798 if (*type_in)
800 gimple stmt = gimple_build_call (newfn, 1, base);
801 if (vectorizable_function (stmt, *type_in, *type_in)
802 != NULL_TREE)
804 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
805 gimple_call_set_lhs (stmt, var);
806 return stmt;
811 return NULL;
815 /* Function vect_recog_widen_sum_pattern
817 Try to find the following pattern:
819 type x_t;
820 TYPE x_T, sum = init;
821 loop:
822 sum_0 = phi <init, sum_1>
823 S1 x_t = *p;
824 S2 x_T = (TYPE) x_t;
825 S3 sum_1 = x_T + sum_0;
827 where type 'TYPE' is at least double the size of type 'type', i.e - we're
828 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
829 a special case of a reduction computation.
831 Input:
833 * LAST_STMT: A stmt from which the pattern search begins. In the example,
834 when this function is called with S3, the pattern {S2,S3} will be detected.
836 Output:
838 * TYPE_IN: The type of the input arguments to the pattern.
840 * TYPE_OUT: The type of the output of this pattern.
842 * Return value: A new stmt that will be used to replace the sequence of
843 stmts that constitute the pattern. In this case it will be:
844 WIDEN_SUM <x_t, sum_0>
846 Note: The widening-sum idiom is a widening reduction pattern that is
847 vectorized without preserving all the intermediate results. It
848 produces only N/2 (widened) results (by summing up pairs of
849 intermediate results) rather than all N results. Therefore, we
850 cannot allow this pattern when we want to get all the results and in
851 the correct order (as is the case when this computation is in an
852 inner-loop nested in an outer-loop that us being vectorized). */
854 static gimple
855 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
856 tree *type_out)
858 gimple stmt, last_stmt = (*stmts)[0];
859 tree oprnd0, oprnd1;
860 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
861 tree type, half_type;
862 gimple pattern_stmt;
863 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
864 struct loop *loop;
865 tree var;
866 bool promotion;
868 if (!loop_info)
869 return NULL;
871 loop = LOOP_VINFO_LOOP (loop_info);
873 if (!is_gimple_assign (last_stmt))
874 return NULL;
876 type = gimple_expr_type (last_stmt);
878 /* Look for the following pattern
879 DX = (TYPE) X;
880 sum_1 = DX + sum_0;
881 In which DX is at least double the size of X, and sum_1 has been
882 recognized as a reduction variable.
885 /* Starting from LAST_STMT, follow the defs of its uses in search
886 of the above pattern. */
888 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
889 return NULL;
891 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
892 return NULL;
894 oprnd0 = gimple_assign_rhs1 (last_stmt);
895 oprnd1 = gimple_assign_rhs2 (last_stmt);
896 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
897 || !types_compatible_p (TREE_TYPE (oprnd1), type))
898 return NULL;
900 /* So far so good. Since last_stmt was detected as a (summation) reduction,
901 we know that oprnd1 is the reduction variable (defined by a loop-header
902 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
903 Left to check that oprnd0 is defined by a cast from type 'type' to type
904 'TYPE'. */
906 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
907 &promotion)
908 || !promotion)
909 return NULL;
911 oprnd0 = gimple_assign_rhs1 (stmt);
912 *type_in = half_type;
913 *type_out = type;
915 /* Pattern detected. Create a stmt to be used to replace the pattern: */
916 var = vect_recog_temp_ssa_var (type, NULL);
917 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
918 oprnd0, oprnd1);
920 if (dump_enabled_p ())
922 dump_printf_loc (MSG_NOTE, vect_location,
923 "vect_recog_widen_sum_pattern: detected: ");
924 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
925 dump_printf (MSG_NOTE, "\n");
928 /* We don't allow changing the order of the computation in the inner-loop
929 when doing outer-loop vectorization. */
930 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
932 return pattern_stmt;
936 /* Return TRUE if the operation in STMT can be performed on a smaller type.
938 Input:
939 STMT - a statement to check.
940 DEF - we support operations with two operands, one of which is constant.
941 The other operand can be defined by a demotion operation, or by a
942 previous statement in a sequence of over-promoted operations. In the
943 later case DEF is used to replace that operand. (It is defined by a
944 pattern statement we created for the previous statement in the
945 sequence).
947 Input/output:
948 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
949 NULL, it's the type of DEF.
950 STMTS - additional pattern statements. If a pattern statement (type
951 conversion) is created in this function, its original statement is
952 added to STMTS.
954 Output:
955 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
956 operands to use in the new pattern statement for STMT (will be created
957 in vect_recog_over_widening_pattern ()).
958 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
959 statements for STMT: the first one is a type promotion and the second
960 one is the operation itself. We return the type promotion statement
961 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
962 the second pattern statement. */
964 static bool
965 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
966 tree *op0, tree *op1, gimple *new_def_stmt,
967 vec<gimple> *stmts)
969 enum tree_code code;
970 tree const_oprnd, oprnd;
971 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
972 gimple def_stmt, new_stmt;
973 bool first = false;
974 bool promotion;
976 *op0 = NULL_TREE;
977 *op1 = NULL_TREE;
978 *new_def_stmt = NULL;
980 if (!is_gimple_assign (stmt))
981 return false;
983 code = gimple_assign_rhs_code (stmt);
984 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
985 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
986 return false;
988 oprnd = gimple_assign_rhs1 (stmt);
989 const_oprnd = gimple_assign_rhs2 (stmt);
990 type = gimple_expr_type (stmt);
992 if (TREE_CODE (oprnd) != SSA_NAME
993 || TREE_CODE (const_oprnd) != INTEGER_CST)
994 return false;
996 /* If oprnd has other uses besides that in stmt we cannot mark it
997 as being part of a pattern only. */
998 if (!has_single_use (oprnd))
999 return false;
1001 /* If we are in the middle of a sequence, we use DEF from a previous
1002 statement. Otherwise, OPRND has to be a result of type promotion. */
1003 if (*new_type)
1005 half_type = *new_type;
1006 oprnd = def;
1008 else
1010 first = true;
1011 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1012 &promotion)
1013 || !promotion
1014 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1015 return false;
1018 /* Can we perform the operation on a smaller type? */
1019 switch (code)
1021 case BIT_IOR_EXPR:
1022 case BIT_XOR_EXPR:
1023 case BIT_AND_EXPR:
1024 if (!int_fits_type_p (const_oprnd, half_type))
1026 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1027 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1028 return false;
1030 interm_type = build_nonstandard_integer_type (
1031 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1032 if (!int_fits_type_p (const_oprnd, interm_type))
1033 return false;
1036 break;
1038 case LSHIFT_EXPR:
1039 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1040 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1041 return false;
1043 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1044 (e.g., if the original value was char, the shift amount is at most 8
1045 if we want to use short). */
1046 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1047 return false;
1049 interm_type = build_nonstandard_integer_type (
1050 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1052 if (!vect_supportable_shift (code, interm_type))
1053 return false;
1055 break;
1057 case RSHIFT_EXPR:
1058 if (vect_supportable_shift (code, half_type))
1059 break;
1061 /* Try intermediate type - HALF_TYPE is not supported. */
1062 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1063 return false;
1065 interm_type = build_nonstandard_integer_type (
1066 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1068 if (!vect_supportable_shift (code, interm_type))
1069 return false;
1071 break;
1073 default:
1074 gcc_unreachable ();
1077 /* There are four possible cases:
1078 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1079 the first statement in the sequence)
1080 a. The original, HALF_TYPE, is not enough - we replace the promotion
1081 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1082 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1083 promotion.
1084 2. OPRND is defined by a pattern statement we created.
1085 a. Its type is not sufficient for the operation, we create a new stmt:
1086 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1087 this statement in NEW_DEF_STMT, and it is later put in
1088 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1089 b. OPRND is good to use in the new statement. */
1090 if (first)
1092 if (interm_type)
1094 /* Replace the original type conversion HALF_TYPE->TYPE with
1095 HALF_TYPE->INTERM_TYPE. */
1096 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1098 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1099 /* Check if the already created pattern stmt is what we need. */
1100 if (!is_gimple_assign (new_stmt)
1101 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1102 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1103 return false;
1105 stmts->safe_push (def_stmt);
1106 oprnd = gimple_assign_lhs (new_stmt);
1108 else
1110 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1111 oprnd = gimple_assign_rhs1 (def_stmt);
1112 new_oprnd = make_ssa_name (interm_type, NULL);
1113 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1114 oprnd, NULL_TREE);
1115 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1116 stmts->safe_push (def_stmt);
1117 oprnd = new_oprnd;
1120 else
1122 /* Retrieve the operand before the type promotion. */
1123 oprnd = gimple_assign_rhs1 (def_stmt);
1126 else
1128 if (interm_type)
1130 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1131 new_oprnd = make_ssa_name (interm_type, NULL);
1132 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1133 oprnd, NULL_TREE);
1134 oprnd = new_oprnd;
1135 *new_def_stmt = new_stmt;
1138 /* Otherwise, OPRND is already set. */
1141 if (interm_type)
1142 *new_type = interm_type;
1143 else
1144 *new_type = half_type;
1146 *op0 = oprnd;
1147 *op1 = fold_convert (*new_type, const_oprnd);
1149 return true;
1153 /* Try to find a statement or a sequence of statements that can be performed
1154 on a smaller type:
1156 type x_t;
1157 TYPE x_T, res0_T, res1_T;
1158 loop:
1159 S1 x_t = *p;
1160 S2 x_T = (TYPE) x_t;
1161 S3 res0_T = op (x_T, C0);
1162 S4 res1_T = op (res0_T, C1);
1163 S5 ... = () res1_T; - type demotion
1165 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1166 constants.
1167 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1168 be 'type' or some intermediate type. For now, we expect S5 to be a type
1169 demotion operation. We also check that S3 and S4 have only one use. */
1171 static gimple
1172 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1173 tree *type_in, tree *type_out)
1175 gimple stmt = stmts->pop ();
1176 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1177 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1178 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1179 bool first;
1180 tree type = NULL;
1182 first = true;
1183 while (1)
1185 if (!vinfo_for_stmt (stmt)
1186 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1187 return NULL;
1189 new_def_stmt = NULL;
1190 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1191 &op0, &op1, &new_def_stmt,
1192 stmts))
1194 if (first)
1195 return NULL;
1196 else
1197 break;
1200 /* STMT can be performed on a smaller type. Check its uses. */
1201 use_stmt = vect_single_imm_use (stmt);
1202 if (!use_stmt || !is_gimple_assign (use_stmt))
1203 return NULL;
1205 /* Create pattern statement for STMT. */
1206 vectype = get_vectype_for_scalar_type (new_type);
1207 if (!vectype)
1208 return NULL;
1210 /* We want to collect all the statements for which we create pattern
1211 statetments, except for the case when the last statement in the
1212 sequence doesn't have a corresponding pattern statement. In such
1213 case we associate the last pattern statement with the last statement
1214 in the sequence. Therefore, we only add the original statement to
1215 the list if we know that it is not the last. */
1216 if (prev_stmt)
1217 stmts->safe_push (prev_stmt);
1219 var = vect_recog_temp_ssa_var (new_type, NULL);
1220 pattern_stmt
1221 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1222 op0, op1);
1223 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1224 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1226 if (dump_enabled_p ())
1228 dump_printf_loc (MSG_NOTE, vect_location,
1229 "created pattern stmt: ");
1230 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1231 dump_printf (MSG_NOTE, "\n");
1234 type = gimple_expr_type (stmt);
1235 prev_stmt = stmt;
1236 stmt = use_stmt;
1238 first = false;
1241 /* We got a sequence. We expect it to end with a type demotion operation.
1242 Otherwise, we quit (for now). There are three possible cases: the
1243 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1244 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1245 NEW_TYPE differs (we create a new conversion statement). */
1246 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1248 use_lhs = gimple_assign_lhs (use_stmt);
1249 use_type = TREE_TYPE (use_lhs);
1250 /* Support only type demotion or signedess change. */
1251 if (!INTEGRAL_TYPE_P (use_type)
1252 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1253 return NULL;
1255 /* Check that NEW_TYPE is not bigger than the conversion result. */
1256 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1257 return NULL;
1259 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1260 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1262 /* Create NEW_TYPE->USE_TYPE conversion. */
1263 new_oprnd = make_ssa_name (use_type, NULL);
1264 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1265 var, NULL_TREE);
1266 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1268 *type_in = get_vectype_for_scalar_type (new_type);
1269 *type_out = get_vectype_for_scalar_type (use_type);
1271 /* We created a pattern statement for the last statement in the
1272 sequence, so we don't need to associate it with the pattern
1273 statement created for PREV_STMT. Therefore, we add PREV_STMT
1274 to the list in order to mark it later in vect_pattern_recog_1. */
1275 if (prev_stmt)
1276 stmts->safe_push (prev_stmt);
1278 else
1280 if (prev_stmt)
1281 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1282 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1284 *type_in = vectype;
1285 *type_out = NULL_TREE;
1288 stmts->safe_push (use_stmt);
1290 else
1291 /* TODO: support general case, create a conversion to the correct type. */
1292 return NULL;
1294 /* Pattern detected. */
1295 if (dump_enabled_p ())
1297 dump_printf_loc (MSG_NOTE, vect_location,
1298 "vect_recog_over_widening_pattern: detected: ");
1299 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1300 dump_printf (MSG_NOTE, "\n");
1303 return pattern_stmt;
1306 /* Detect widening shift pattern:
1308 type a_t;
1309 TYPE a_T, res_T;
1311 S1 a_t = ;
1312 S2 a_T = (TYPE) a_t;
1313 S3 res_T = a_T << CONST;
1315 where type 'TYPE' is at least double the size of type 'type'.
1317 Also detect cases where the shift result is immediately converted
1318 to another type 'result_type' that is no larger in size than 'TYPE'.
1319 In those cases we perform a widen-shift that directly results in
1320 'result_type', to avoid a possible over-widening situation:
1322 type a_t;
1323 TYPE a_T, res_T;
1324 result_type res_result;
1326 S1 a_t = ;
1327 S2 a_T = (TYPE) a_t;
1328 S3 res_T = a_T << CONST;
1329 S4 res_result = (result_type) res_T;
1330 '--> res_result' = a_t w<< CONST;
1332 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1333 create an additional pattern stmt for S2 to create a variable of an
1334 intermediate type, and perform widen-shift on the intermediate type:
1336 type a_t;
1337 interm_type a_it;
1338 TYPE a_T, res_T, res_T';
1340 S1 a_t = ;
1341 S2 a_T = (TYPE) a_t;
1342 '--> a_it = (interm_type) a_t;
1343 S3 res_T = a_T << CONST;
1344 '--> res_T' = a_it <<* CONST;
1346 Input/Output:
1348 * STMTS: Contains a stmt from which the pattern search begins.
1349 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1350 in STMTS. When an intermediate type is used and a pattern statement is
1351 created for S2, we also put S2 here (before S3).
1353 Output:
1355 * TYPE_IN: The type of the input arguments to the pattern.
1357 * TYPE_OUT: The type of the output of this pattern.
1359 * Return value: A new stmt that will be used to replace the sequence of
1360 stmts that constitute the pattern. In this case it will be:
1361 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1363 static gimple
1364 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1365 tree *type_in, tree *type_out)
1367 gimple last_stmt = stmts->pop ();
1368 gimple def_stmt0;
1369 tree oprnd0, oprnd1;
1370 tree type, half_type0;
1371 gimple pattern_stmt;
1372 tree vectype, vectype_out = NULL_TREE;
1373 tree var;
1374 enum tree_code dummy_code;
1375 int dummy_int;
1376 vec<tree> dummy_vec;
1377 gimple use_stmt;
1378 bool promotion;
1380 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1381 return NULL;
1383 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1384 return NULL;
1386 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1387 return NULL;
1389 oprnd0 = gimple_assign_rhs1 (last_stmt);
1390 oprnd1 = gimple_assign_rhs2 (last_stmt);
1391 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1392 return NULL;
1394 /* Check operand 0: it has to be defined by a type promotion. */
1395 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1396 &promotion)
1397 || !promotion)
1398 return NULL;
1400 /* Check operand 1: has to be positive. We check that it fits the type
1401 in vect_handle_widen_op_by_const (). */
1402 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1403 return NULL;
1405 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1406 type = gimple_expr_type (last_stmt);
1408 /* Check for subsequent conversion to another type. */
1409 use_stmt = vect_single_imm_use (last_stmt);
1410 if (use_stmt && is_gimple_assign (use_stmt)
1411 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1412 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1414 tree use_lhs = gimple_assign_lhs (use_stmt);
1415 tree use_type = TREE_TYPE (use_lhs);
1417 if (INTEGRAL_TYPE_P (use_type)
1418 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1420 last_stmt = use_stmt;
1421 type = use_type;
1425 /* Check if this a widening operation. */
1426 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1427 &oprnd0, stmts,
1428 type, &half_type0, def_stmt0))
1429 return NULL;
1431 /* Pattern detected. */
1432 if (dump_enabled_p ())
1433 dump_printf_loc (MSG_NOTE, vect_location,
1434 "vect_recog_widen_shift_pattern: detected:\n");
1436 /* Check target support. */
1437 vectype = get_vectype_for_scalar_type (half_type0);
1438 vectype_out = get_vectype_for_scalar_type (type);
1440 if (!vectype
1441 || !vectype_out
1442 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1443 vectype_out, vectype,
1444 &dummy_code, &dummy_code,
1445 &dummy_int, &dummy_vec))
1446 return NULL;
1448 *type_in = vectype;
1449 *type_out = vectype_out;
1451 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1452 var = vect_recog_temp_ssa_var (type, NULL);
1453 pattern_stmt =
1454 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1456 if (dump_enabled_p ())
1457 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1459 stmts->safe_push (last_stmt);
1460 return pattern_stmt;
1463 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1465 type a_t, b_t, c_t;
1467 S0 a_t = b_t r<< c_t;
1469 Input/Output:
1471 * STMTS: Contains a stmt from which the pattern search begins,
1472 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1473 with a sequence:
1475 S1 d_t = -c_t;
1476 S2 e_t = d_t & (B - 1);
1477 S3 f_t = b_t << c_t;
1478 S4 g_t = b_t >> e_t;
1479 S0 a_t = f_t | g_t;
1481 where B is element bitsize of type.
1483 Output:
1485 * TYPE_IN: The type of the input arguments to the pattern.
1487 * TYPE_OUT: The type of the output of this pattern.
1489 * Return value: A new stmt that will be used to replace the rotate
1490 S0 stmt. */
1492 static gimple
1493 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1495 gimple last_stmt = stmts->pop ();
1496 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1497 gimple pattern_stmt, def_stmt;
1498 enum tree_code rhs_code;
1499 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1500 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1501 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1502 enum vect_def_type dt;
1503 optab optab1, optab2;
1504 edge ext_def = NULL;
1506 if (!is_gimple_assign (last_stmt))
1507 return NULL;
1509 rhs_code = gimple_assign_rhs_code (last_stmt);
1510 switch (rhs_code)
1512 case LROTATE_EXPR:
1513 case RROTATE_EXPR:
1514 break;
1515 default:
1516 return NULL;
1519 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1520 return NULL;
1522 lhs = gimple_assign_lhs (last_stmt);
1523 oprnd0 = gimple_assign_rhs1 (last_stmt);
1524 type = TREE_TYPE (oprnd0);
1525 oprnd1 = gimple_assign_rhs2 (last_stmt);
1526 if (TREE_CODE (oprnd0) != SSA_NAME
1527 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1528 || !INTEGRAL_TYPE_P (type)
1529 || !TYPE_UNSIGNED (type))
1530 return NULL;
1532 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1533 &def, &dt))
1534 return NULL;
1536 if (dt != vect_internal_def
1537 && dt != vect_constant_def
1538 && dt != vect_external_def)
1539 return NULL;
1541 vectype = get_vectype_for_scalar_type (type);
1542 if (vectype == NULL_TREE)
1543 return NULL;
1545 /* If vector/vector or vector/scalar rotate is supported by the target,
1546 don't do anything here. */
1547 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1548 if (optab1
1549 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1550 return NULL;
1552 if (bb_vinfo != NULL || dt != vect_internal_def)
1554 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1555 if (optab2
1556 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1557 return NULL;
1560 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1561 don't do anything here either. */
1562 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1563 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1564 if (!optab1
1565 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1566 || !optab2
1567 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1569 if (bb_vinfo == NULL && dt == vect_internal_def)
1570 return NULL;
1571 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1572 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1573 if (!optab1
1574 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1575 || !optab2
1576 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1577 return NULL;
1580 *type_in = vectype;
1581 *type_out = vectype;
1582 if (*type_in == NULL_TREE)
1583 return NULL;
1585 if (dt == vect_external_def
1586 && TREE_CODE (oprnd1) == SSA_NAME
1587 && loop_vinfo)
1589 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1590 ext_def = loop_preheader_edge (loop);
1591 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1593 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1594 if (bb == NULL
1595 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1596 ext_def = NULL;
1600 def = NULL_TREE;
1601 if (TREE_CODE (oprnd1) == INTEGER_CST
1602 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1603 def = oprnd1;
1604 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1606 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1607 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1608 && TYPE_PRECISION (TREE_TYPE (rhs1))
1609 == TYPE_PRECISION (type))
1610 def = rhs1;
1613 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1614 if (def == NULL_TREE)
1616 def = vect_recog_temp_ssa_var (type, NULL);
1617 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1618 NULL_TREE);
1619 if (ext_def)
1621 basic_block new_bb
1622 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1623 gcc_assert (!new_bb);
1625 else
1626 append_pattern_def_seq (stmt_vinfo, def_stmt);
1628 stype = TREE_TYPE (def);
1630 if (TREE_CODE (def) == INTEGER_CST)
1632 if (!host_integerp (def, 1)
1633 || (unsigned HOST_WIDE_INT) tree_low_cst (def, 1)
1634 >= GET_MODE_PRECISION (TYPE_MODE (type))
1635 || integer_zerop (def))
1636 return NULL;
1637 def2 = build_int_cst (stype,
1638 GET_MODE_PRECISION (TYPE_MODE (type))
1639 - tree_low_cst (def, 1));
1641 else
1643 tree vecstype = get_vectype_for_scalar_type (stype);
1644 stmt_vec_info def_stmt_vinfo;
1646 if (vecstype == NULL_TREE)
1647 return NULL;
1648 def2 = vect_recog_temp_ssa_var (stype, NULL);
1649 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1650 NULL_TREE);
1651 if (ext_def)
1653 basic_block new_bb
1654 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1655 gcc_assert (!new_bb);
1657 else
1659 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1660 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1661 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1662 append_pattern_def_seq (stmt_vinfo, def_stmt);
1665 def2 = vect_recog_temp_ssa_var (stype, NULL);
1666 tree mask
1667 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1668 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1669 gimple_assign_lhs (def_stmt),
1670 mask);
1671 if (ext_def)
1673 basic_block new_bb
1674 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1675 gcc_assert (!new_bb);
1677 else
1679 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1680 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1681 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1682 append_pattern_def_seq (stmt_vinfo, def_stmt);
1686 var1 = vect_recog_temp_ssa_var (type, NULL);
1687 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1688 ? LSHIFT_EXPR : RSHIFT_EXPR,
1689 var1, oprnd0, def);
1690 append_pattern_def_seq (stmt_vinfo, def_stmt);
1692 var2 = vect_recog_temp_ssa_var (type, NULL);
1693 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1694 ? RSHIFT_EXPR : LSHIFT_EXPR,
1695 var2, oprnd0, def2);
1696 append_pattern_def_seq (stmt_vinfo, def_stmt);
1698 /* Pattern detected. */
1699 if (dump_enabled_p ())
1700 dump_printf_loc (MSG_NOTE, vect_location,
1701 "vect_recog_rotate_pattern: detected:\n");
1703 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1704 var = vect_recog_temp_ssa_var (type, NULL);
1705 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1707 if (dump_enabled_p ())
1708 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1710 stmts->safe_push (last_stmt);
1711 return pattern_stmt;
1714 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1715 vectorized:
1717 type a_t;
1718 TYPE b_T, res_T;
1720 S1 a_t = ;
1721 S2 b_T = ;
1722 S3 res_T = b_T op a_t;
1724 where type 'TYPE' is a type with different size than 'type',
1725 and op is <<, >> or rotate.
1727 Also detect cases:
1729 type a_t;
1730 TYPE b_T, c_T, res_T;
1732 S0 c_T = ;
1733 S1 a_t = (type) c_T;
1734 S2 b_T = ;
1735 S3 res_T = b_T op a_t;
1737 Input/Output:
1739 * STMTS: Contains a stmt from which the pattern search begins,
1740 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1741 with a shift/rotate which has same type on both operands, in the
1742 second case just b_T op c_T, in the first case with added cast
1743 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1745 Output:
1747 * TYPE_IN: The type of the input arguments to the pattern.
1749 * TYPE_OUT: The type of the output of this pattern.
1751 * Return value: A new stmt that will be used to replace the shift/rotate
1752 S3 stmt. */
1754 static gimple
1755 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1756 tree *type_in, tree *type_out)
1758 gimple last_stmt = stmts->pop ();
1759 tree oprnd0, oprnd1, lhs, var;
1760 gimple pattern_stmt, def_stmt;
1761 enum tree_code rhs_code;
1762 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1763 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1764 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1765 enum vect_def_type dt;
1766 tree def;
1768 if (!is_gimple_assign (last_stmt))
1769 return NULL;
1771 rhs_code = gimple_assign_rhs_code (last_stmt);
1772 switch (rhs_code)
1774 case LSHIFT_EXPR:
1775 case RSHIFT_EXPR:
1776 case LROTATE_EXPR:
1777 case RROTATE_EXPR:
1778 break;
1779 default:
1780 return NULL;
1783 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1784 return NULL;
1786 lhs = gimple_assign_lhs (last_stmt);
1787 oprnd0 = gimple_assign_rhs1 (last_stmt);
1788 oprnd1 = gimple_assign_rhs2 (last_stmt);
1789 if (TREE_CODE (oprnd0) != SSA_NAME
1790 || TREE_CODE (oprnd1) != SSA_NAME
1791 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1792 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1793 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1794 || TYPE_PRECISION (TREE_TYPE (lhs))
1795 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1796 return NULL;
1798 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1799 &def, &dt))
1800 return NULL;
1802 if (dt != vect_internal_def)
1803 return NULL;
1805 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1806 *type_out = *type_in;
1807 if (*type_in == NULL_TREE)
1808 return NULL;
1810 def = NULL_TREE;
1811 if (gimple_assign_cast_p (def_stmt))
1813 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1814 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1815 && TYPE_PRECISION (TREE_TYPE (rhs1))
1816 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1817 def = rhs1;
1820 if (def == NULL_TREE)
1822 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1823 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1824 NULL_TREE);
1825 new_pattern_def_seq (stmt_vinfo, def_stmt);
1828 /* Pattern detected. */
1829 if (dump_enabled_p ())
1830 dump_printf_loc (MSG_NOTE, vect_location,
1831 "vect_recog_vector_vector_shift_pattern: detected:\n");
1833 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1834 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1835 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1837 if (dump_enabled_p ())
1838 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1840 stmts->safe_push (last_stmt);
1841 return pattern_stmt;
1844 /* Detect a signed division by a constant that wouldn't be
1845 otherwise vectorized:
1847 type a_t, b_t;
1849 S1 a_t = b_t / N;
1851 where type 'type' is an integral type and N is a constant.
1853 Similarly handle modulo by a constant:
1855 S4 a_t = b_t % N;
1857 Input/Output:
1859 * STMTS: Contains a stmt from which the pattern search begins,
1860 i.e. the division stmt. S1 is replaced by if N is a power
1861 of two constant and type is signed:
1862 S3 y_t = b_t < 0 ? N - 1 : 0;
1863 S2 x_t = b_t + y_t;
1864 S1' a_t = x_t >> log2 (N);
1866 S4 is replaced if N is a power of two constant and
1867 type is signed by (where *_T temporaries have unsigned type):
1868 S9 y_T = b_t < 0 ? -1U : 0U;
1869 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1870 S7 z_t = (type) z_T;
1871 S6 w_t = b_t + z_t;
1872 S5 x_t = w_t & (N - 1);
1873 S4' a_t = x_t - z_t;
1875 Output:
1877 * TYPE_IN: The type of the input arguments to the pattern.
1879 * TYPE_OUT: The type of the output of this pattern.
1881 * Return value: A new stmt that will be used to replace the division
1882 S1 or modulo S4 stmt. */
1884 static gimple
1885 vect_recog_divmod_pattern (vec<gimple> *stmts,
1886 tree *type_in, tree *type_out)
1888 gimple last_stmt = stmts->pop ();
1889 tree oprnd0, oprnd1, vectype, itype, cond;
1890 gimple pattern_stmt, def_stmt;
1891 enum tree_code rhs_code;
1892 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1893 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1894 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1895 optab optab;
1896 tree q;
1897 int dummy_int, prec;
1898 stmt_vec_info def_stmt_vinfo;
1900 if (!is_gimple_assign (last_stmt))
1901 return NULL;
1903 rhs_code = gimple_assign_rhs_code (last_stmt);
1904 switch (rhs_code)
1906 case TRUNC_DIV_EXPR:
1907 case TRUNC_MOD_EXPR:
1908 break;
1909 default:
1910 return NULL;
1913 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1914 return NULL;
1916 oprnd0 = gimple_assign_rhs1 (last_stmt);
1917 oprnd1 = gimple_assign_rhs2 (last_stmt);
1918 itype = TREE_TYPE (oprnd0);
1919 if (TREE_CODE (oprnd0) != SSA_NAME
1920 || TREE_CODE (oprnd1) != INTEGER_CST
1921 || TREE_CODE (itype) != INTEGER_TYPE
1922 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1923 return NULL;
1925 vectype = get_vectype_for_scalar_type (itype);
1926 if (vectype == NULL_TREE)
1927 return NULL;
1929 /* If the target can handle vectorized division or modulo natively,
1930 don't attempt to optimize this. */
1931 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1932 if (optab != unknown_optab)
1934 enum machine_mode vec_mode = TYPE_MODE (vectype);
1935 int icode = (int) optab_handler (optab, vec_mode);
1936 if (icode != CODE_FOR_nothing)
1937 return NULL;
1940 prec = TYPE_PRECISION (itype);
1941 if (integer_pow2p (oprnd1))
1943 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1944 return NULL;
1946 /* Pattern detected. */
1947 if (dump_enabled_p ())
1948 dump_printf_loc (MSG_NOTE, vect_location,
1949 "vect_recog_divmod_pattern: detected:\n");
1951 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1952 build_int_cst (itype, 0));
1953 if (rhs_code == TRUNC_DIV_EXPR)
1955 tree var = vect_recog_temp_ssa_var (itype, NULL);
1956 tree shift;
1957 def_stmt
1958 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1959 fold_build2 (MINUS_EXPR, itype,
1960 oprnd1,
1961 build_int_cst (itype,
1962 1)),
1963 build_int_cst (itype, 0));
1964 new_pattern_def_seq (stmt_vinfo, def_stmt);
1965 var = vect_recog_temp_ssa_var (itype, NULL);
1966 def_stmt
1967 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1968 gimple_assign_lhs (def_stmt));
1969 append_pattern_def_seq (stmt_vinfo, def_stmt);
1971 shift = build_int_cst (itype, tree_log2 (oprnd1));
1972 pattern_stmt
1973 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1974 vect_recog_temp_ssa_var (itype,
1975 NULL),
1976 var, shift);
1978 else
1980 tree signmask;
1981 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1982 if (compare_tree_int (oprnd1, 2) == 0)
1984 signmask = vect_recog_temp_ssa_var (itype, NULL);
1985 def_stmt
1986 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1987 build_int_cst (itype, 1),
1988 build_int_cst (itype, 0));
1989 append_pattern_def_seq (stmt_vinfo, def_stmt);
1991 else
1993 tree utype
1994 = build_nonstandard_integer_type (prec, 1);
1995 tree vecutype = get_vectype_for_scalar_type (utype);
1996 tree shift
1997 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1998 - tree_log2 (oprnd1));
1999 tree var = vect_recog_temp_ssa_var (utype, NULL);
2001 def_stmt
2002 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2003 build_int_cst (utype, -1),
2004 build_int_cst (utype, 0));
2005 def_stmt_vinfo
2006 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2007 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2008 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2009 append_pattern_def_seq (stmt_vinfo, def_stmt);
2010 var = vect_recog_temp_ssa_var (utype, NULL);
2011 def_stmt
2012 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2013 gimple_assign_lhs (def_stmt),
2014 shift);
2015 def_stmt_vinfo
2016 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2017 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2018 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2019 append_pattern_def_seq (stmt_vinfo, def_stmt);
2020 signmask = vect_recog_temp_ssa_var (itype, NULL);
2021 def_stmt
2022 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2023 NULL_TREE);
2024 append_pattern_def_seq (stmt_vinfo, def_stmt);
2026 def_stmt
2027 = gimple_build_assign_with_ops (PLUS_EXPR,
2028 vect_recog_temp_ssa_var (itype,
2029 NULL),
2030 oprnd0, signmask);
2031 append_pattern_def_seq (stmt_vinfo, def_stmt);
2032 def_stmt
2033 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2034 vect_recog_temp_ssa_var (itype,
2035 NULL),
2036 gimple_assign_lhs (def_stmt),
2037 fold_build2 (MINUS_EXPR, itype,
2038 oprnd1,
2039 build_int_cst (itype,
2040 1)));
2041 append_pattern_def_seq (stmt_vinfo, def_stmt);
2043 pattern_stmt
2044 = gimple_build_assign_with_ops (MINUS_EXPR,
2045 vect_recog_temp_ssa_var (itype,
2046 NULL),
2047 gimple_assign_lhs (def_stmt),
2048 signmask);
2051 if (dump_enabled_p ())
2052 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2055 stmts->safe_push (last_stmt);
2057 *type_in = vectype;
2058 *type_out = vectype;
2059 return pattern_stmt;
2062 if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
2063 || integer_zerop (oprnd1)
2064 || prec > HOST_BITS_PER_WIDE_INT)
2065 return NULL;
2067 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2068 return NULL;
2070 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2072 if (TYPE_UNSIGNED (itype))
2074 unsigned HOST_WIDE_INT mh, ml;
2075 int pre_shift, post_shift;
2076 unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
2077 & GET_MODE_MASK (TYPE_MODE (itype));
2078 tree t1, t2, t3, t4;
2080 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2081 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2082 return NULL;
2084 /* Find a suitable multiplier and right shift count
2085 instead of multiplying with D. */
2086 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2088 /* If the suggested multiplier is more than SIZE bits, we can do better
2089 for even divisors, using an initial right shift. */
2090 if (mh != 0 && (d & 1) == 0)
2092 pre_shift = floor_log2 (d & -d);
2093 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2094 &ml, &post_shift, &dummy_int);
2095 gcc_assert (!mh);
2097 else
2098 pre_shift = 0;
2100 if (mh != 0)
2102 if (post_shift - 1 >= prec)
2103 return NULL;
2105 /* t1 = oprnd0 h* ml;
2106 t2 = oprnd0 - t1;
2107 t3 = t2 >> 1;
2108 t4 = t1 + t3;
2109 q = t4 >> (post_shift - 1); */
2110 t1 = vect_recog_temp_ssa_var (itype, NULL);
2111 def_stmt
2112 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2113 build_int_cst (itype, ml));
2114 append_pattern_def_seq (stmt_vinfo, def_stmt);
2116 t2 = vect_recog_temp_ssa_var (itype, NULL);
2117 def_stmt
2118 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2119 append_pattern_def_seq (stmt_vinfo, def_stmt);
2121 t3 = vect_recog_temp_ssa_var (itype, NULL);
2122 def_stmt
2123 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2124 integer_one_node);
2125 append_pattern_def_seq (stmt_vinfo, def_stmt);
2127 t4 = vect_recog_temp_ssa_var (itype, NULL);
2128 def_stmt
2129 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2131 if (post_shift != 1)
2133 append_pattern_def_seq (stmt_vinfo, def_stmt);
2135 q = vect_recog_temp_ssa_var (itype, NULL);
2136 pattern_stmt
2137 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2138 build_int_cst (itype,
2139 post_shift
2140 - 1));
2142 else
2144 q = t4;
2145 pattern_stmt = def_stmt;
2148 else
2150 if (pre_shift >= prec || post_shift >= prec)
2151 return NULL;
2153 /* t1 = oprnd0 >> pre_shift;
2154 t2 = t1 h* ml;
2155 q = t2 >> post_shift; */
2156 if (pre_shift)
2158 t1 = vect_recog_temp_ssa_var (itype, NULL);
2159 def_stmt
2160 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2161 build_int_cst (NULL,
2162 pre_shift));
2163 append_pattern_def_seq (stmt_vinfo, def_stmt);
2165 else
2166 t1 = oprnd0;
2168 t2 = vect_recog_temp_ssa_var (itype, NULL);
2169 def_stmt
2170 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2171 build_int_cst (itype, ml));
2173 if (post_shift)
2175 append_pattern_def_seq (stmt_vinfo, def_stmt);
2177 q = vect_recog_temp_ssa_var (itype, NULL);
2178 def_stmt
2179 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2180 build_int_cst (itype,
2181 post_shift));
2183 else
2184 q = t2;
2186 pattern_stmt = def_stmt;
2189 else
2191 unsigned HOST_WIDE_INT ml;
2192 int post_shift;
2193 HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
2194 unsigned HOST_WIDE_INT abs_d;
2195 bool add = false;
2196 tree t1, t2, t3, t4;
2198 /* Give up for -1. */
2199 if (d == -1)
2200 return NULL;
2202 /* Since d might be INT_MIN, we have to cast to
2203 unsigned HOST_WIDE_INT before negating to avoid
2204 undefined signed overflow. */
2205 abs_d = (d >= 0
2206 ? (unsigned HOST_WIDE_INT) d
2207 : - (unsigned HOST_WIDE_INT) d);
2209 /* n rem d = n rem -d */
2210 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2212 d = abs_d;
2213 oprnd1 = build_int_cst (itype, abs_d);
2215 else if (HOST_BITS_PER_WIDE_INT >= prec
2216 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2217 /* This case is not handled correctly below. */
2218 return NULL;
2220 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2221 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2223 add = true;
2224 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2226 if (post_shift >= prec)
2227 return NULL;
2229 /* t1 = oprnd1 h* ml; */
2230 t1 = vect_recog_temp_ssa_var (itype, NULL);
2231 def_stmt
2232 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2233 build_int_cst (itype, ml));
2234 append_pattern_def_seq (stmt_vinfo, def_stmt);
2236 if (add)
2238 /* t2 = t1 + oprnd0; */
2239 t2 = vect_recog_temp_ssa_var (itype, NULL);
2240 def_stmt
2241 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2242 append_pattern_def_seq (stmt_vinfo, def_stmt);
2244 else
2245 t2 = t1;
2247 if (post_shift)
2249 /* t3 = t2 >> post_shift; */
2250 t3 = vect_recog_temp_ssa_var (itype, NULL);
2251 def_stmt
2252 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2253 build_int_cst (itype, post_shift));
2254 append_pattern_def_seq (stmt_vinfo, def_stmt);
2256 else
2257 t3 = t2;
2259 /* t4 = oprnd0 >> (prec - 1); */
2260 t4 = vect_recog_temp_ssa_var (itype, NULL);
2261 def_stmt
2262 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2263 build_int_cst (itype, prec - 1));
2264 append_pattern_def_seq (stmt_vinfo, def_stmt);
2266 /* q = t3 - t4; or q = t4 - t3; */
2267 q = vect_recog_temp_ssa_var (itype, NULL);
2268 pattern_stmt
2269 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2270 d < 0 ? t3 : t4);
2273 if (rhs_code == TRUNC_MOD_EXPR)
2275 tree r, t1;
2277 /* We divided. Now finish by:
2278 t1 = q * oprnd1;
2279 r = oprnd0 - t1; */
2280 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2282 t1 = vect_recog_temp_ssa_var (itype, NULL);
2283 def_stmt
2284 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2285 append_pattern_def_seq (stmt_vinfo, def_stmt);
2287 r = vect_recog_temp_ssa_var (itype, NULL);
2288 pattern_stmt
2289 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2292 /* Pattern detected. */
2293 if (dump_enabled_p ())
2295 dump_printf_loc (MSG_NOTE, vect_location,
2296 "vect_recog_divmod_pattern: detected: ");
2297 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2298 dump_printf (MSG_NOTE, "\n");
2301 stmts->safe_push (last_stmt);
2303 *type_in = vectype;
2304 *type_out = vectype;
2305 return pattern_stmt;
2308 /* Function vect_recog_mixed_size_cond_pattern
2310 Try to find the following pattern:
2312 type x_t, y_t;
2313 TYPE a_T, b_T, c_T;
2314 loop:
2315 S1 a_T = x_t CMP y_t ? b_T : c_T;
2317 where type 'TYPE' is an integral type which has different size
2318 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2319 than 'type', the constants need to fit into an integer type
2320 with the same width as 'type') or results of conversion from 'type'.
2322 Input:
2324 * LAST_STMT: A stmt from which the pattern search begins.
2326 Output:
2328 * TYPE_IN: The type of the input arguments to the pattern.
2330 * TYPE_OUT: The type of the output of this pattern.
2332 * Return value: A new stmt that will be used to replace the pattern.
2333 Additionally a def_stmt is added.
2335 a_it = x_t CMP y_t ? b_it : c_it;
2336 a_T = (TYPE) a_it; */
2338 static gimple
2339 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2340 tree *type_out)
2342 gimple last_stmt = (*stmts)[0];
2343 tree cond_expr, then_clause, else_clause;
2344 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2345 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2346 enum machine_mode cmpmode;
2347 gimple pattern_stmt, def_stmt;
2348 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2349 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2350 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2351 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2352 bool promotion;
2353 tree comp_scalar_type;
2355 if (!is_gimple_assign (last_stmt)
2356 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2357 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2358 return NULL;
2360 cond_expr = gimple_assign_rhs1 (last_stmt);
2361 then_clause = gimple_assign_rhs2 (last_stmt);
2362 else_clause = gimple_assign_rhs3 (last_stmt);
2364 if (!COMPARISON_CLASS_P (cond_expr))
2365 return NULL;
2367 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2368 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2369 if (comp_vectype == NULL_TREE)
2370 return NULL;
2372 type = gimple_expr_type (last_stmt);
2373 if (types_compatible_p (type, comp_scalar_type)
2374 || ((TREE_CODE (then_clause) != INTEGER_CST
2375 || TREE_CODE (else_clause) != INTEGER_CST)
2376 && !INTEGRAL_TYPE_P (comp_scalar_type))
2377 || !INTEGRAL_TYPE_P (type))
2378 return NULL;
2380 if ((TREE_CODE (then_clause) != INTEGER_CST
2381 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2382 &def_stmt0, &promotion))
2383 || (TREE_CODE (else_clause) != INTEGER_CST
2384 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2385 &def_stmt1, &promotion)))
2386 return NULL;
2388 if (orig_type0 && orig_type1
2389 && !types_compatible_p (orig_type0, orig_type1))
2390 return NULL;
2392 if (orig_type0)
2394 if (!types_compatible_p (orig_type0, comp_scalar_type))
2395 return NULL;
2396 then_clause = gimple_assign_rhs1 (def_stmt0);
2397 itype = orig_type0;
2400 if (orig_type1)
2402 if (!types_compatible_p (orig_type1, comp_scalar_type))
2403 return NULL;
2404 else_clause = gimple_assign_rhs1 (def_stmt1);
2405 itype = orig_type1;
2408 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2410 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2411 return NULL;
2413 vectype = get_vectype_for_scalar_type (type);
2414 if (vectype == NULL_TREE)
2415 return NULL;
2417 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2418 return NULL;
2420 if (itype == NULL_TREE)
2421 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2422 TYPE_UNSIGNED (type));
2424 if (itype == NULL_TREE
2425 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2426 return NULL;
2428 vecitype = get_vectype_for_scalar_type (itype);
2429 if (vecitype == NULL_TREE)
2430 return NULL;
2432 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2433 return NULL;
2435 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2437 if ((TREE_CODE (then_clause) == INTEGER_CST
2438 && !int_fits_type_p (then_clause, itype))
2439 || (TREE_CODE (else_clause) == INTEGER_CST
2440 && !int_fits_type_p (else_clause, itype)))
2441 return NULL;
2444 def_stmt
2445 = gimple_build_assign_with_ops (COND_EXPR,
2446 vect_recog_temp_ssa_var (itype, NULL),
2447 unshare_expr (cond_expr),
2448 fold_convert (itype, then_clause),
2449 fold_convert (itype, else_clause));
2450 pattern_stmt
2451 = gimple_build_assign_with_ops (NOP_EXPR,
2452 vect_recog_temp_ssa_var (type, NULL),
2453 gimple_assign_lhs (def_stmt), NULL_TREE);
2455 new_pattern_def_seq (stmt_vinfo, def_stmt);
2456 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2457 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2458 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2459 *type_in = vecitype;
2460 *type_out = vectype;
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_NOTE, vect_location,
2464 "vect_recog_mixed_size_cond_pattern: detected:\n");
2466 return pattern_stmt;
2470 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2471 true if bool VAR can be optimized that way. */
2473 static bool
2474 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2476 gimple def_stmt;
2477 enum vect_def_type dt;
2478 tree def, rhs1;
2479 enum tree_code rhs_code;
2481 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2482 &dt))
2483 return false;
2485 if (dt != vect_internal_def)
2486 return false;
2488 if (!is_gimple_assign (def_stmt))
2489 return false;
2491 if (!has_single_use (def))
2492 return false;
2494 rhs1 = gimple_assign_rhs1 (def_stmt);
2495 rhs_code = gimple_assign_rhs_code (def_stmt);
2496 switch (rhs_code)
2498 case SSA_NAME:
2499 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2501 CASE_CONVERT:
2502 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2503 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2504 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2505 return false;
2506 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2508 case BIT_NOT_EXPR:
2509 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2511 case BIT_AND_EXPR:
2512 case BIT_IOR_EXPR:
2513 case BIT_XOR_EXPR:
2514 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2515 return false;
2516 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2517 bb_vinfo);
2519 default:
2520 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2522 tree vecitype, comp_vectype;
2524 /* If the comparison can throw, then is_gimple_condexpr will be
2525 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2526 if (stmt_could_throw_p (def_stmt))
2527 return false;
2529 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2530 if (comp_vectype == NULL_TREE)
2531 return false;
2533 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2535 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2536 tree itype
2537 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2538 vecitype = get_vectype_for_scalar_type (itype);
2539 if (vecitype == NULL_TREE)
2540 return false;
2542 else
2543 vecitype = comp_vectype;
2544 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2546 return false;
2551 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2552 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2553 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2555 static tree
2556 adjust_bool_pattern_cast (tree type, tree var)
2558 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2559 gimple cast_stmt, pattern_stmt;
2561 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2562 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2563 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2564 cast_stmt
2565 = gimple_build_assign_with_ops (NOP_EXPR,
2566 vect_recog_temp_ssa_var (type, NULL),
2567 gimple_assign_lhs (pattern_stmt),
2568 NULL_TREE);
2569 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2570 return gimple_assign_lhs (cast_stmt);
2574 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2575 recursively. VAR is an SSA_NAME that should be transformed from bool
2576 to a wider integer type, OUT_TYPE is the desired final integer type of
2577 the whole pattern, TRUEVAL should be NULL unless optimizing
2578 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2579 in the then_clause, STMTS is where statements with added pattern stmts
2580 should be pushed to. */
2582 static tree
2583 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2584 vec<gimple> *stmts)
2586 gimple stmt = SSA_NAME_DEF_STMT (var);
2587 enum tree_code rhs_code, def_rhs_code;
2588 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2589 location_t loc;
2590 gimple pattern_stmt, def_stmt;
2592 rhs1 = gimple_assign_rhs1 (stmt);
2593 rhs2 = gimple_assign_rhs2 (stmt);
2594 rhs_code = gimple_assign_rhs_code (stmt);
2595 loc = gimple_location (stmt);
2596 switch (rhs_code)
2598 case SSA_NAME:
2599 CASE_CONVERT:
2600 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2601 itype = TREE_TYPE (irhs1);
2602 pattern_stmt
2603 = gimple_build_assign_with_ops (SSA_NAME,
2604 vect_recog_temp_ssa_var (itype, NULL),
2605 irhs1, NULL_TREE);
2606 break;
2608 case BIT_NOT_EXPR:
2609 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2610 itype = TREE_TYPE (irhs1);
2611 pattern_stmt
2612 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2613 vect_recog_temp_ssa_var (itype, NULL),
2614 irhs1, build_int_cst (itype, 1));
2615 break;
2617 case BIT_AND_EXPR:
2618 /* Try to optimize x = y & (a < b ? 1 : 0); into
2619 x = (a < b ? y : 0);
2621 E.g. for:
2622 bool a_b, b_b, c_b;
2623 TYPE d_T;
2625 S1 a_b = x1 CMP1 y1;
2626 S2 b_b = x2 CMP2 y2;
2627 S3 c_b = a_b & b_b;
2628 S4 d_T = (TYPE) c_b;
2630 we would normally emit:
2632 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2633 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2634 S3' c_T = a_T & b_T;
2635 S4' d_T = c_T;
2637 but we can save one stmt by using the
2638 result of one of the COND_EXPRs in the other COND_EXPR and leave
2639 BIT_AND_EXPR stmt out:
2641 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2642 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2643 S4' f_T = c_T;
2645 At least when VEC_COND_EXPR is implemented using masks
2646 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2647 computes the comparison masks and ands it, in one case with
2648 all ones vector, in the other case with a vector register.
2649 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2650 often more expensive. */
2651 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2652 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2653 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2655 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2656 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2657 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2658 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2660 gimple tstmt;
2661 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2662 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2663 tstmt = stmts->pop ();
2664 gcc_assert (tstmt == def_stmt);
2665 stmts->quick_push (stmt);
2666 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2667 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2668 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2669 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2670 return irhs2;
2672 else
2673 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2674 goto and_ior_xor;
2676 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2677 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2678 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2680 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2681 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2682 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2683 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2685 gimple tstmt;
2686 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2687 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2688 tstmt = stmts->pop ();
2689 gcc_assert (tstmt == def_stmt);
2690 stmts->quick_push (stmt);
2691 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2692 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2693 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2694 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2695 return irhs1;
2697 else
2698 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2699 goto and_ior_xor;
2701 /* FALLTHRU */
2702 case BIT_IOR_EXPR:
2703 case BIT_XOR_EXPR:
2704 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2705 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2706 and_ior_xor:
2707 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2708 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2710 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2711 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2712 int out_prec = TYPE_PRECISION (out_type);
2713 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2714 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2715 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2716 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2717 else
2719 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2720 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2723 itype = TREE_TYPE (irhs1);
2724 pattern_stmt
2725 = gimple_build_assign_with_ops (rhs_code,
2726 vect_recog_temp_ssa_var (itype, NULL),
2727 irhs1, irhs2);
2728 break;
2730 default:
2731 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2732 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2733 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2734 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2735 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2737 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2738 itype
2739 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2741 else
2742 itype = TREE_TYPE (rhs1);
2743 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2744 if (trueval == NULL_TREE)
2745 trueval = build_int_cst (itype, 1);
2746 else
2747 gcc_checking_assert (useless_type_conversion_p (itype,
2748 TREE_TYPE (trueval)));
2749 pattern_stmt
2750 = gimple_build_assign_with_ops (COND_EXPR,
2751 vect_recog_temp_ssa_var (itype, NULL),
2752 cond_expr, trueval,
2753 build_int_cst (itype, 0));
2754 break;
2757 stmts->safe_push (stmt);
2758 gimple_set_location (pattern_stmt, loc);
2759 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2760 return gimple_assign_lhs (pattern_stmt);
2764 /* Function vect_recog_bool_pattern
2766 Try to find pattern like following:
2768 bool a_b, b_b, c_b, d_b, e_b;
2769 TYPE f_T;
2770 loop:
2771 S1 a_b = x1 CMP1 y1;
2772 S2 b_b = x2 CMP2 y2;
2773 S3 c_b = a_b & b_b;
2774 S4 d_b = x3 CMP3 y3;
2775 S5 e_b = c_b | d_b;
2776 S6 f_T = (TYPE) e_b;
2778 where type 'TYPE' is an integral type.
2780 Input:
2782 * LAST_STMT: A stmt at the end from which the pattern
2783 search begins, i.e. cast of a bool to
2784 an integer type.
2786 Output:
2788 * TYPE_IN: The type of the input arguments to the pattern.
2790 * TYPE_OUT: The type of the output of this pattern.
2792 * Return value: A new stmt that will be used to replace the pattern.
2794 Assuming size of TYPE is the same as size of all comparisons
2795 (otherwise some casts would be added where needed), the above
2796 sequence we create related pattern stmts:
2797 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2798 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2799 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2800 S5' e_T = c_T | d_T;
2801 S6' f_T = e_T;
2803 Instead of the above S3' we could emit:
2804 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2805 S3' c_T = a_T | b_T;
2806 but the above is more efficient. */
2808 static gimple
2809 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2810 tree *type_out)
2812 gimple last_stmt = stmts->pop ();
2813 enum tree_code rhs_code;
2814 tree var, lhs, rhs, vectype;
2815 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2816 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2817 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2818 gimple pattern_stmt;
2820 if (!is_gimple_assign (last_stmt))
2821 return NULL;
2823 var = gimple_assign_rhs1 (last_stmt);
2824 lhs = gimple_assign_lhs (last_stmt);
2826 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2827 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2828 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2829 return NULL;
2831 rhs_code = gimple_assign_rhs_code (last_stmt);
2832 if (CONVERT_EXPR_CODE_P (rhs_code))
2834 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2835 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2836 return NULL;
2837 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2838 if (vectype == NULL_TREE)
2839 return NULL;
2841 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2842 return NULL;
2844 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2845 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2846 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2847 pattern_stmt
2848 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2849 else
2850 pattern_stmt
2851 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2852 *type_out = vectype;
2853 *type_in = vectype;
2854 stmts->safe_push (last_stmt);
2855 if (dump_enabled_p ())
2856 dump_printf_loc (MSG_NOTE, vect_location,
2857 "vect_recog_bool_pattern: detected:\n");
2859 return pattern_stmt;
2861 else if (rhs_code == SSA_NAME
2862 && STMT_VINFO_DATA_REF (stmt_vinfo))
2864 stmt_vec_info pattern_stmt_info;
2865 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2866 gcc_assert (vectype != NULL_TREE);
2867 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2868 return NULL;
2869 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2870 return NULL;
2872 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2873 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2874 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2876 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2877 gimple cast_stmt
2878 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2879 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2880 rhs = rhs2;
2882 pattern_stmt
2883 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2884 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2885 bb_vinfo);
2886 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2887 STMT_VINFO_DATA_REF (pattern_stmt_info)
2888 = STMT_VINFO_DATA_REF (stmt_vinfo);
2889 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2890 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2891 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2892 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2893 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2894 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2895 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2896 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2897 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2898 *type_out = vectype;
2899 *type_in = vectype;
2900 stmts->safe_push (last_stmt);
2901 if (dump_enabled_p ())
2902 dump_printf_loc (MSG_NOTE, vect_location,
2903 "vect_recog_bool_pattern: detected:\n");
2904 return pattern_stmt;
2906 else
2907 return NULL;
2911 /* Mark statements that are involved in a pattern. */
2913 static inline void
2914 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2915 tree pattern_vectype)
2917 stmt_vec_info pattern_stmt_info, def_stmt_info;
2918 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2919 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2920 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2921 gimple def_stmt;
2923 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2924 if (pattern_stmt_info == NULL)
2926 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2927 bb_vinfo);
2928 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2930 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2932 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2933 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2934 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2935 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2936 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2937 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2938 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2939 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2940 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2942 gimple_stmt_iterator si;
2943 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2944 !gsi_end_p (si); gsi_next (&si))
2946 def_stmt = gsi_stmt (si);
2947 def_stmt_info = vinfo_for_stmt (def_stmt);
2948 if (def_stmt_info == NULL)
2950 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2951 bb_vinfo);
2952 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2954 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2955 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2956 STMT_VINFO_DEF_TYPE (def_stmt_info)
2957 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2958 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2959 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2964 /* Function vect_pattern_recog_1
2966 Input:
2967 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2968 computation pattern.
2969 STMT: A stmt from which the pattern search should start.
2971 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2972 expression that computes the same functionality and can be used to
2973 replace the sequence of stmts that are involved in the pattern.
2975 Output:
2976 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2977 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2978 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2979 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2980 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2981 to the available target pattern.
2983 This function also does some bookkeeping, as explained in the documentation
2984 for vect_recog_pattern. */
2986 static void
2987 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2988 gimple_stmt_iterator si,
2989 vec<gimple> *stmts_to_replace)
2991 gimple stmt = gsi_stmt (si), pattern_stmt;
2992 stmt_vec_info stmt_info;
2993 loop_vec_info loop_vinfo;
2994 tree pattern_vectype;
2995 tree type_in, type_out;
2996 enum tree_code code;
2997 int i;
2998 gimple next;
3000 stmts_to_replace->truncate (0);
3001 stmts_to_replace->quick_push (stmt);
3002 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3003 if (!pattern_stmt)
3004 return;
3006 stmt = stmts_to_replace->last ();
3007 stmt_info = vinfo_for_stmt (stmt);
3008 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3010 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3012 /* No need to check target support (already checked by the pattern
3013 recognition function). */
3014 pattern_vectype = type_out ? type_out : type_in;
3016 else
3018 enum machine_mode vec_mode;
3019 enum insn_code icode;
3020 optab optab;
3022 /* Check target support */
3023 type_in = get_vectype_for_scalar_type (type_in);
3024 if (!type_in)
3025 return;
3026 if (type_out)
3027 type_out = get_vectype_for_scalar_type (type_out);
3028 else
3029 type_out = type_in;
3030 if (!type_out)
3031 return;
3032 pattern_vectype = type_out;
3034 if (is_gimple_assign (pattern_stmt))
3035 code = gimple_assign_rhs_code (pattern_stmt);
3036 else
3038 gcc_assert (is_gimple_call (pattern_stmt));
3039 code = CALL_EXPR;
3042 optab = optab_for_tree_code (code, type_in, optab_default);
3043 vec_mode = TYPE_MODE (type_in);
3044 if (!optab
3045 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3046 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3047 return;
3050 /* Found a vectorizable pattern. */
3051 if (dump_enabled_p ())
3053 dump_printf_loc (MSG_NOTE, vect_location,
3054 "pattern recognized: ");
3055 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3056 dump_printf (MSG_NOTE, "\n");
3059 /* Mark the stmts that are involved in the pattern. */
3060 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3062 /* Patterns cannot be vectorized using SLP, because they change the order of
3063 computation. */
3064 if (loop_vinfo)
3065 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3066 if (next == stmt)
3067 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3069 /* It is possible that additional pattern stmts are created and inserted in
3070 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3071 relevant statements. */
3072 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3073 && (unsigned) i < (stmts_to_replace->length () - 1);
3074 i++)
3076 stmt_info = vinfo_for_stmt (stmt);
3077 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3078 if (dump_enabled_p ())
3080 dump_printf_loc (MSG_NOTE, vect_location,
3081 "additional pattern stmt: ");
3082 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3083 dump_printf (MSG_NOTE, "\n");
3086 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3091 /* Function vect_pattern_recog
3093 Input:
3094 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3095 computation idioms.
3097 Output - for each computation idiom that is detected we create a new stmt
3098 that provides the same functionality and that can be vectorized. We
3099 also record some information in the struct_stmt_info of the relevant
3100 stmts, as explained below:
3102 At the entry to this function we have the following stmts, with the
3103 following initial value in the STMT_VINFO fields:
3105 stmt in_pattern_p related_stmt vec_stmt
3106 S1: a_i = .... - - -
3107 S2: a_2 = ..use(a_i).. - - -
3108 S3: a_1 = ..use(a_2).. - - -
3109 S4: a_0 = ..use(a_1).. - - -
3110 S5: ... = ..use(a_0).. - - -
3112 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3113 represented by a single stmt. We then:
3114 - create a new stmt S6 equivalent to the pattern (the stmt is not
3115 inserted into the code)
3116 - fill in the STMT_VINFO fields as follows:
3118 in_pattern_p related_stmt vec_stmt
3119 S1: a_i = .... - - -
3120 S2: a_2 = ..use(a_i).. - - -
3121 S3: a_1 = ..use(a_2).. - - -
3122 S4: a_0 = ..use(a_1).. true S6 -
3123 '---> S6: a_new = .... - S4 -
3124 S5: ... = ..use(a_0).. - - -
3126 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3127 to each other through the RELATED_STMT field).
3129 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3130 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3131 remain irrelevant unless used by stmts other than S4.
3133 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3134 (because they are marked as irrelevant). It will vectorize S6, and record
3135 a pointer to the new vector stmt VS6 from S6 (as usual).
3136 S4 will be skipped, and S5 will be vectorized as usual:
3138 in_pattern_p related_stmt vec_stmt
3139 S1: a_i = .... - - -
3140 S2: a_2 = ..use(a_i).. - - -
3141 S3: a_1 = ..use(a_2).. - - -
3142 > VS6: va_new = .... - - -
3143 S4: a_0 = ..use(a_1).. true S6 VS6
3144 '---> S6: a_new = .... - S4 VS6
3145 > VS5: ... = ..vuse(va_new).. - - -
3146 S5: ... = ..use(a_0).. - - -
3148 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3149 elsewhere), and we'll end up with:
3151 VS6: va_new = ....
3152 VS5: ... = ..vuse(va_new)..
3154 In case of more than one pattern statements, e.g., widen-mult with
3155 intermediate type:
3157 S1 a_t = ;
3158 S2 a_T = (TYPE) a_t;
3159 '--> S3: a_it = (interm_type) a_t;
3160 S4 prod_T = a_T * CONST;
3161 '--> S5: prod_T' = a_it w* CONST;
3163 there may be other users of a_T outside the pattern. In that case S2 will
3164 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3165 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3166 be recorded in S3. */
3168 void
3169 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3171 struct loop *loop;
3172 basic_block *bbs;
3173 unsigned int nbbs;
3174 gimple_stmt_iterator si;
3175 unsigned int i, j;
3176 vect_recog_func_ptr vect_recog_func;
3177 vec<gimple> stmts_to_replace;
3178 stmts_to_replace.create (1);
3179 gimple stmt;
3181 if (dump_enabled_p ())
3182 dump_printf_loc (MSG_NOTE, vect_location,
3183 "=== vect_pattern_recog ===\n");
3185 if (loop_vinfo)
3187 loop = LOOP_VINFO_LOOP (loop_vinfo);
3188 bbs = LOOP_VINFO_BBS (loop_vinfo);
3189 nbbs = loop->num_nodes;
3191 else
3193 bbs = &BB_VINFO_BB (bb_vinfo);
3194 nbbs = 1;
3197 /* Scan through the loop stmts, applying the pattern recognition
3198 functions starting at each stmt visited: */
3199 for (i = 0; i < nbbs; i++)
3201 basic_block bb = bbs[i];
3202 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3204 if (bb_vinfo && (stmt = gsi_stmt (si))
3205 && vinfo_for_stmt (stmt)
3206 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3207 continue;
3209 /* Scan over all generic vect_recog_xxx_pattern functions. */
3210 for (j = 0; j < NUM_PATTERNS; j++)
3212 vect_recog_func = vect_vect_recog_func_ptrs[j];
3213 vect_pattern_recog_1 (vect_recog_func, si,
3214 &stmts_to_replace);
3219 stmts_to_replace.release ();