PR libstdc++/56278
[official-gcc.git] / gcc / tree-vect-patterns.c
blobe8275d9d2a4692deee6bca0d7f758d121246205f
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "basic-block.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-flow.h"
31 #include "cfgloop.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "params.h"
35 #include "tree-data-ref.h"
36 #include "tree-vectorizer.h"
37 #include "recog.h" /* FIXME: for insn_data */
38 #include "diagnostic-core.h"
39 #include "dumpfile.h"
41 /* Pattern recognition functions */
42 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
43 tree *);
44 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
45 tree *);
46 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
47 tree *);
48 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
49 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
50 tree *);
51 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
52 tree *, tree *);
53 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
54 tree *, tree *);
55 static gimple vect_recog_divmod_pattern (vec<gimple> *,
56 tree *, tree *);
57 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
58 tree *, tree *);
59 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
60 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
61 vect_recog_widen_mult_pattern,
62 vect_recog_widen_sum_pattern,
63 vect_recog_dot_prod_pattern,
64 vect_recog_pow_pattern,
65 vect_recog_widen_shift_pattern,
66 vect_recog_over_widening_pattern,
67 vect_recog_vector_vector_shift_pattern,
68 vect_recog_divmod_pattern,
69 vect_recog_mixed_size_cond_pattern,
70 vect_recog_bool_pattern};
72 static inline void
73 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
75 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
76 stmt);
79 static inline void
80 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
82 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
83 append_pattern_def_seq (stmt_info, stmt);
86 /* Check whether STMT2 is in the same loop or basic block as STMT1.
87 Which of the two applies depends on whether we're currently doing
88 loop-based or basic-block-based vectorization, as determined by
89 the vinfo_for_stmt for STMT1 (which must be defined).
91 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
92 to be defined as well. */
94 static bool
95 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
97 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
98 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
99 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
101 if (!gimple_bb (stmt2))
102 return false;
104 if (loop_vinfo)
106 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
107 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
108 return false;
110 else
112 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
113 || gimple_code (stmt2) == GIMPLE_PHI)
114 return false;
117 gcc_assert (vinfo_for_stmt (stmt2));
118 return true;
121 /* If the LHS of DEF_STMT has a single use, and that statement is
122 in the same loop or basic block, return it. */
124 static gimple
125 vect_single_imm_use (gimple def_stmt)
127 tree lhs = gimple_assign_lhs (def_stmt);
128 use_operand_p use_p;
129 gimple use_stmt;
131 if (!single_imm_use (lhs, &use_p, &use_stmt))
132 return NULL;
134 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
135 return NULL;
137 return use_stmt;
140 /* Check whether NAME, an ssa-name used in USE_STMT,
141 is a result of a type promotion or demotion, such that:
142 DEF_STMT: NAME = NOP (name0)
143 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
144 If CHECK_SIGN is TRUE, check that either both types are signed or both are
145 unsigned. */
147 static bool
148 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
149 tree *orig_type, gimple *def_stmt, bool *promotion)
151 tree dummy;
152 gimple dummy_gimple;
153 loop_vec_info loop_vinfo;
154 stmt_vec_info stmt_vinfo;
155 tree type = TREE_TYPE (name);
156 tree oprnd0;
157 enum vect_def_type dt;
158 tree def;
159 bb_vec_info bb_vinfo;
161 stmt_vinfo = vinfo_for_stmt (use_stmt);
162 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
163 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
164 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
165 &def, &dt))
166 return false;
168 if (dt != vect_internal_def
169 && dt != vect_external_def && dt != vect_constant_def)
170 return false;
172 if (!*def_stmt)
173 return false;
175 if (!is_gimple_assign (*def_stmt))
176 return false;
178 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
179 return false;
181 oprnd0 = gimple_assign_rhs1 (*def_stmt);
183 *orig_type = TREE_TYPE (oprnd0);
184 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
185 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
186 return false;
188 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
189 *promotion = true;
190 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
191 *promotion = false;
192 else
193 return false;
195 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
196 bb_vinfo, &dummy_gimple, &dummy, &dt))
197 return false;
199 return true;
202 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
203 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
205 static tree
206 vect_recog_temp_ssa_var (tree type, gimple stmt)
208 return make_temp_ssa_name (type, stmt, "patt");
211 /* Function vect_recog_dot_prod_pattern
213 Try to find the following pattern:
215 type x_t, y_t;
216 TYPE1 prod;
217 TYPE2 sum = init;
218 loop:
219 sum_0 = phi <init, sum_1>
220 S1 x_t = ...
221 S2 y_t = ...
222 S3 x_T = (TYPE1) x_t;
223 S4 y_T = (TYPE1) y_t;
224 S5 prod = x_T * y_T;
225 [S6 prod = (TYPE2) prod; #optional]
226 S7 sum_1 = prod + sum_0;
228 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
229 same size of 'TYPE1' or bigger. This is a special case of a reduction
230 computation.
232 Input:
234 * STMTS: Contains a stmt from which the pattern search begins. In the
235 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
236 will be detected.
238 Output:
240 * TYPE_IN: The type of the input arguments to the pattern.
242 * TYPE_OUT: The type of the output of this pattern.
244 * Return value: A new stmt that will be used to replace the sequence of
245 stmts that constitute the pattern. In this case it will be:
246 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
248 Note: The dot-prod idiom is a widening reduction pattern that is
249 vectorized without preserving all the intermediate results. It
250 produces only N/2 (widened) results (by summing up pairs of
251 intermediate results) rather than all N results. Therefore, we
252 cannot allow this pattern when we want to get all the results and in
253 the correct order (as is the case when this computation is in an
254 inner-loop nested in an outer-loop that us being vectorized). */
256 static gimple
257 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
258 tree *type_out)
260 gimple stmt, last_stmt = (*stmts)[0];
261 tree oprnd0, oprnd1;
262 tree oprnd00, oprnd01;
263 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
264 tree type, half_type;
265 gimple pattern_stmt;
266 tree prod_type;
267 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
268 struct loop *loop;
269 tree var;
270 bool promotion;
272 if (!loop_info)
273 return NULL;
275 loop = LOOP_VINFO_LOOP (loop_info);
277 if (!is_gimple_assign (last_stmt))
278 return NULL;
280 type = gimple_expr_type (last_stmt);
282 /* Look for the following pattern
283 DX = (TYPE1) X;
284 DY = (TYPE1) Y;
285 DPROD = DX * DY;
286 DDPROD = (TYPE2) DPROD;
287 sum_1 = DDPROD + sum_0;
288 In which
289 - DX is double the size of X
290 - DY is double the size of Y
291 - DX, DY, DPROD all have the same type
292 - sum is the same size of DPROD or bigger
293 - sum has been recognized as a reduction variable.
295 This is equivalent to:
296 DPROD = X w* Y; #widen mult
297 sum_1 = DPROD w+ sum_0; #widen summation
299 DPROD = X w* Y; #widen mult
300 sum_1 = DPROD + sum_0; #summation
303 /* Starting from LAST_STMT, follow the defs of its uses in search
304 of the above pattern. */
306 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
307 return NULL;
309 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
311 /* Has been detected as widening-summation? */
313 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
314 type = gimple_expr_type (stmt);
315 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
316 return NULL;
317 oprnd0 = gimple_assign_rhs1 (stmt);
318 oprnd1 = gimple_assign_rhs2 (stmt);
319 half_type = TREE_TYPE (oprnd0);
321 else
323 gimple def_stmt;
325 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
326 return NULL;
327 oprnd0 = gimple_assign_rhs1 (last_stmt);
328 oprnd1 = gimple_assign_rhs2 (last_stmt);
329 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
330 || !types_compatible_p (TREE_TYPE (oprnd1), type))
331 return NULL;
332 stmt = last_stmt;
334 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
335 &promotion)
336 && promotion)
338 stmt = def_stmt;
339 oprnd0 = gimple_assign_rhs1 (stmt);
341 else
342 half_type = type;
345 /* So far so good. Since last_stmt was detected as a (summation) reduction,
346 we know that oprnd1 is the reduction variable (defined by a loop-header
347 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
348 Left to check that oprnd0 is defined by a (widen_)mult_expr */
349 if (TREE_CODE (oprnd0) != SSA_NAME)
350 return NULL;
352 prod_type = half_type;
353 stmt = SSA_NAME_DEF_STMT (oprnd0);
355 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
356 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
357 return NULL;
359 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
360 inside the loop (in case we are analyzing an outer-loop). */
361 if (!is_gimple_assign (stmt))
362 return NULL;
363 stmt_vinfo = vinfo_for_stmt (stmt);
364 gcc_assert (stmt_vinfo);
365 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
366 return NULL;
367 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
368 return NULL;
369 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
371 /* Has been detected as a widening multiplication? */
373 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
374 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
375 return NULL;
376 stmt_vinfo = vinfo_for_stmt (stmt);
377 gcc_assert (stmt_vinfo);
378 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
379 oprnd00 = gimple_assign_rhs1 (stmt);
380 oprnd01 = gimple_assign_rhs2 (stmt);
382 else
384 tree half_type0, half_type1;
385 gimple def_stmt;
386 tree oprnd0, oprnd1;
388 oprnd0 = gimple_assign_rhs1 (stmt);
389 oprnd1 = gimple_assign_rhs2 (stmt);
390 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
391 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
392 return NULL;
393 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
394 &promotion)
395 || !promotion)
396 return NULL;
397 oprnd00 = gimple_assign_rhs1 (def_stmt);
398 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
399 &promotion)
400 || !promotion)
401 return NULL;
402 oprnd01 = gimple_assign_rhs1 (def_stmt);
403 if (!types_compatible_p (half_type0, half_type1))
404 return NULL;
405 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
406 return NULL;
409 half_type = TREE_TYPE (oprnd00);
410 *type_in = half_type;
411 *type_out = type;
413 /* Pattern detected. Create a stmt to be used to replace the pattern: */
414 var = vect_recog_temp_ssa_var (type, NULL);
415 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
416 oprnd00, oprnd01, oprnd1);
418 if (dump_enabled_p ())
420 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
421 "vect_recog_dot_prod_pattern: detected: ");
422 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
425 /* We don't allow changing the order of the computation in the inner-loop
426 when doing outer-loop vectorization. */
427 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
429 return pattern_stmt;
433 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
434 and LSHIFT_EXPR.
436 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
437 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
439 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
440 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
441 that satisfies the above restrictions, we can perform a widening opeartion
442 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
443 with a_it = (interm_type) a_t; */
445 static bool
446 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
447 tree const_oprnd, tree *oprnd,
448 vec<gimple> *stmts, tree type,
449 tree *half_type, gimple def_stmt)
451 tree new_type, new_oprnd;
452 gimple new_stmt;
454 if (code != MULT_EXPR && code != LSHIFT_EXPR)
455 return false;
457 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
458 || (code == LSHIFT_EXPR
459 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
460 != 1))
461 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
463 /* CONST_OPRND is a constant of HALF_TYPE. */
464 *oprnd = gimple_assign_rhs1 (def_stmt);
465 return true;
468 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
469 return false;
471 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
472 return false;
474 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
475 a type 2 times bigger than HALF_TYPE. */
476 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
477 TYPE_UNSIGNED (type));
478 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
479 || (code == LSHIFT_EXPR
480 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
481 return false;
483 /* Use NEW_TYPE for widening operation. */
484 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
486 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
487 /* Check if the already created pattern stmt is what we need. */
488 if (!is_gimple_assign (new_stmt)
489 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
490 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
491 return false;
493 stmts->safe_push (def_stmt);
494 *oprnd = gimple_assign_lhs (new_stmt);
496 else
498 /* Create a_T = (NEW_TYPE) a_t; */
499 *oprnd = gimple_assign_rhs1 (def_stmt);
500 new_oprnd = make_ssa_name (new_type, NULL);
501 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
502 NULL_TREE);
503 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
504 stmts->safe_push (def_stmt);
505 *oprnd = new_oprnd;
508 *half_type = new_type;
509 return true;
513 /* Function vect_recog_widen_mult_pattern
515 Try to find the following pattern:
517 type a_t, b_t;
518 TYPE a_T, b_T, prod_T;
520 S1 a_t = ;
521 S2 b_t = ;
522 S3 a_T = (TYPE) a_t;
523 S4 b_T = (TYPE) b_t;
524 S5 prod_T = a_T * b_T;
526 where type 'TYPE' is at least double the size of type 'type'.
528 Also detect unsigned cases:
530 unsigned type a_t, b_t;
531 unsigned TYPE u_prod_T;
532 TYPE a_T, b_T, prod_T;
534 S1 a_t = ;
535 S2 b_t = ;
536 S3 a_T = (TYPE) a_t;
537 S4 b_T = (TYPE) b_t;
538 S5 prod_T = a_T * b_T;
539 S6 u_prod_T = (unsigned TYPE) prod_T;
541 and multiplication by constants:
543 type a_t;
544 TYPE a_T, prod_T;
546 S1 a_t = ;
547 S3 a_T = (TYPE) a_t;
548 S5 prod_T = a_T * CONST;
550 A special case of multiplication by constants is when 'TYPE' is 4 times
551 bigger than 'type', but CONST fits an intermediate type 2 times smaller
552 than 'TYPE'. In that case we create an additional pattern stmt for S3
553 to create a variable of the intermediate type, and perform widen-mult
554 on the intermediate type as well:
556 type a_t;
557 interm_type a_it;
558 TYPE a_T, prod_T, prod_T';
560 S1 a_t = ;
561 S3 a_T = (TYPE) a_t;
562 '--> a_it = (interm_type) a_t;
563 S5 prod_T = a_T * CONST;
564 '--> prod_T' = a_it w* CONST;
566 Input/Output:
568 * STMTS: Contains a stmt from which the pattern search begins. In the
569 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
570 is detected. In case of unsigned widen-mult, the original stmt (S5) is
571 replaced with S6 in STMTS. In case of multiplication by a constant
572 of an intermediate type (the last case above), STMTS also contains S3
573 (inserted before S5).
575 Output:
577 * TYPE_IN: The type of the input arguments to the pattern.
579 * TYPE_OUT: The type of the output of this pattern.
581 * Return value: A new stmt that will be used to replace the sequence of
582 stmts that constitute the pattern. In this case it will be:
583 WIDEN_MULT <a_t, b_t>
586 static gimple
587 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
588 tree *type_in, tree *type_out)
590 gimple last_stmt = stmts->pop ();
591 gimple def_stmt0, def_stmt1;
592 tree oprnd0, oprnd1;
593 tree type, half_type0, half_type1;
594 gimple pattern_stmt;
595 tree vectype, vectype_out = NULL_TREE;
596 tree var;
597 enum tree_code dummy_code;
598 int dummy_int;
599 vec<tree> dummy_vec;
600 bool op1_ok;
601 bool promotion;
603 if (!is_gimple_assign (last_stmt))
604 return NULL;
606 type = gimple_expr_type (last_stmt);
608 /* Starting from LAST_STMT, follow the defs of its uses in search
609 of the above pattern. */
611 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
612 return NULL;
614 oprnd0 = gimple_assign_rhs1 (last_stmt);
615 oprnd1 = gimple_assign_rhs2 (last_stmt);
616 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
617 || !types_compatible_p (TREE_TYPE (oprnd1), type))
618 return NULL;
620 /* Check argument 0. */
621 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
622 &promotion)
623 || !promotion)
624 return NULL;
625 /* Check argument 1. */
626 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
627 &def_stmt1, &promotion);
629 if (op1_ok && promotion)
631 oprnd0 = gimple_assign_rhs1 (def_stmt0);
632 oprnd1 = gimple_assign_rhs1 (def_stmt1);
634 else
636 if (TREE_CODE (oprnd1) == INTEGER_CST
637 && TREE_CODE (half_type0) == INTEGER_TYPE
638 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
639 &oprnd0, stmts, type,
640 &half_type0, def_stmt0))
641 half_type1 = half_type0;
642 else
643 return NULL;
646 /* Handle unsigned case. Look for
647 S6 u_prod_T = (unsigned TYPE) prod_T;
648 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
649 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
651 gimple use_stmt;
652 tree use_lhs;
653 tree use_type;
655 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
656 return NULL;
658 use_stmt = vect_single_imm_use (last_stmt);
659 if (!use_stmt || !is_gimple_assign (use_stmt)
660 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
661 return NULL;
663 use_lhs = gimple_assign_lhs (use_stmt);
664 use_type = TREE_TYPE (use_lhs);
665 if (!INTEGRAL_TYPE_P (use_type)
666 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
667 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
668 return NULL;
670 type = use_type;
671 last_stmt = use_stmt;
674 if (!types_compatible_p (half_type0, half_type1))
675 return NULL;
677 /* Pattern detected. */
678 if (dump_enabled_p ())
679 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
680 "vect_recog_widen_mult_pattern: detected: ");
682 /* Check target support */
683 vectype = get_vectype_for_scalar_type (half_type0);
684 vectype_out = get_vectype_for_scalar_type (type);
685 if (!vectype
686 || !vectype_out
687 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
688 vectype_out, vectype,
689 &dummy_code, &dummy_code,
690 &dummy_int, &dummy_vec))
691 return NULL;
693 *type_in = vectype;
694 *type_out = vectype_out;
696 /* Pattern supported. Create a stmt to be used to replace the pattern: */
697 var = vect_recog_temp_ssa_var (type, NULL);
698 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
699 oprnd1);
701 if (dump_enabled_p ())
702 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
704 stmts->safe_push (last_stmt);
705 return pattern_stmt;
709 /* Function vect_recog_pow_pattern
711 Try to find the following pattern:
713 x = POW (y, N);
715 with POW being one of pow, powf, powi, powif and N being
716 either 2 or 0.5.
718 Input:
720 * LAST_STMT: A stmt from which the pattern search begins.
722 Output:
724 * TYPE_IN: The type of the input arguments to the pattern.
726 * TYPE_OUT: The type of the output of this pattern.
728 * Return value: A new stmt that will be used to replace the sequence of
729 stmts that constitute the pattern. In this case it will be:
730 x = x * x
732 x = sqrt (x)
735 static gimple
736 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
737 tree *type_out)
739 gimple last_stmt = (*stmts)[0];
740 tree fn, base, exp = NULL;
741 gimple stmt;
742 tree var;
744 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
745 return NULL;
747 fn = gimple_call_fndecl (last_stmt);
748 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
749 return NULL;
751 switch (DECL_FUNCTION_CODE (fn))
753 case BUILT_IN_POWIF:
754 case BUILT_IN_POWI:
755 case BUILT_IN_POWF:
756 case BUILT_IN_POW:
757 base = gimple_call_arg (last_stmt, 0);
758 exp = gimple_call_arg (last_stmt, 1);
759 if (TREE_CODE (exp) != REAL_CST
760 && TREE_CODE (exp) != INTEGER_CST)
761 return NULL;
762 break;
764 default:
765 return NULL;
768 /* We now have a pow or powi builtin function call with a constant
769 exponent. */
771 *type_out = NULL_TREE;
773 /* Catch squaring. */
774 if ((host_integerp (exp, 0)
775 && tree_low_cst (exp, 0) == 2)
776 || (TREE_CODE (exp) == REAL_CST
777 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
779 *type_in = TREE_TYPE (base);
781 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
782 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
783 return stmt;
786 /* Catch square root. */
787 if (TREE_CODE (exp) == REAL_CST
788 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
790 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
791 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
792 if (*type_in)
794 gimple stmt = gimple_build_call (newfn, 1, base);
795 if (vectorizable_function (stmt, *type_in, *type_in)
796 != NULL_TREE)
798 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
799 gimple_call_set_lhs (stmt, var);
800 return stmt;
805 return NULL;
809 /* Function vect_recog_widen_sum_pattern
811 Try to find the following pattern:
813 type x_t;
814 TYPE x_T, sum = init;
815 loop:
816 sum_0 = phi <init, sum_1>
817 S1 x_t = *p;
818 S2 x_T = (TYPE) x_t;
819 S3 sum_1 = x_T + sum_0;
821 where type 'TYPE' is at least double the size of type 'type', i.e - we're
822 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
823 a special case of a reduction computation.
825 Input:
827 * LAST_STMT: A stmt from which the pattern search begins. In the example,
828 when this function is called with S3, the pattern {S2,S3} will be detected.
830 Output:
832 * TYPE_IN: The type of the input arguments to the pattern.
834 * TYPE_OUT: The type of the output of this pattern.
836 * Return value: A new stmt that will be used to replace the sequence of
837 stmts that constitute the pattern. In this case it will be:
838 WIDEN_SUM <x_t, sum_0>
840 Note: The widening-sum idiom is a widening reduction pattern that is
841 vectorized without preserving all the intermediate results. It
842 produces only N/2 (widened) results (by summing up pairs of
843 intermediate results) rather than all N results. Therefore, we
844 cannot allow this pattern when we want to get all the results and in
845 the correct order (as is the case when this computation is in an
846 inner-loop nested in an outer-loop that us being vectorized). */
848 static gimple
849 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
850 tree *type_out)
852 gimple stmt, last_stmt = (*stmts)[0];
853 tree oprnd0, oprnd1;
854 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
855 tree type, half_type;
856 gimple pattern_stmt;
857 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
858 struct loop *loop;
859 tree var;
860 bool promotion;
862 if (!loop_info)
863 return NULL;
865 loop = LOOP_VINFO_LOOP (loop_info);
867 if (!is_gimple_assign (last_stmt))
868 return NULL;
870 type = gimple_expr_type (last_stmt);
872 /* Look for the following pattern
873 DX = (TYPE) X;
874 sum_1 = DX + sum_0;
875 In which DX is at least double the size of X, and sum_1 has been
876 recognized as a reduction variable.
879 /* Starting from LAST_STMT, follow the defs of its uses in search
880 of the above pattern. */
882 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
883 return NULL;
885 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
886 return NULL;
888 oprnd0 = gimple_assign_rhs1 (last_stmt);
889 oprnd1 = gimple_assign_rhs2 (last_stmt);
890 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
891 || !types_compatible_p (TREE_TYPE (oprnd1), type))
892 return NULL;
894 /* So far so good. Since last_stmt was detected as a (summation) reduction,
895 we know that oprnd1 is the reduction variable (defined by a loop-header
896 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
897 Left to check that oprnd0 is defined by a cast from type 'type' to type
898 'TYPE'. */
900 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
901 &promotion)
902 || !promotion)
903 return NULL;
905 oprnd0 = gimple_assign_rhs1 (stmt);
906 *type_in = half_type;
907 *type_out = type;
909 /* Pattern detected. Create a stmt to be used to replace the pattern: */
910 var = vect_recog_temp_ssa_var (type, NULL);
911 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
912 oprnd0, oprnd1);
914 if (dump_enabled_p ())
916 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
917 "vect_recog_widen_sum_pattern: detected: ");
918 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
921 /* We don't allow changing the order of the computation in the inner-loop
922 when doing outer-loop vectorization. */
923 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
925 return pattern_stmt;
929 /* Return TRUE if the operation in STMT can be performed on a smaller type.
931 Input:
932 STMT - a statement to check.
933 DEF - we support operations with two operands, one of which is constant.
934 The other operand can be defined by a demotion operation, or by a
935 previous statement in a sequence of over-promoted operations. In the
936 later case DEF is used to replace that operand. (It is defined by a
937 pattern statement we created for the previous statement in the
938 sequence).
940 Input/output:
941 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
942 NULL, it's the type of DEF.
943 STMTS - additional pattern statements. If a pattern statement (type
944 conversion) is created in this function, its original statement is
945 added to STMTS.
947 Output:
948 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
949 operands to use in the new pattern statement for STMT (will be created
950 in vect_recog_over_widening_pattern ()).
951 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
952 statements for STMT: the first one is a type promotion and the second
953 one is the operation itself. We return the type promotion statement
954 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
955 the second pattern statement. */
957 static bool
958 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
959 tree *op0, tree *op1, gimple *new_def_stmt,
960 vec<gimple> *stmts)
962 enum tree_code code;
963 tree const_oprnd, oprnd;
964 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
965 gimple def_stmt, new_stmt;
966 bool first = false;
967 bool promotion;
969 *op0 = NULL_TREE;
970 *op1 = NULL_TREE;
971 *new_def_stmt = NULL;
973 if (!is_gimple_assign (stmt))
974 return false;
976 code = gimple_assign_rhs_code (stmt);
977 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
978 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
979 return false;
981 oprnd = gimple_assign_rhs1 (stmt);
982 const_oprnd = gimple_assign_rhs2 (stmt);
983 type = gimple_expr_type (stmt);
985 if (TREE_CODE (oprnd) != SSA_NAME
986 || TREE_CODE (const_oprnd) != INTEGER_CST)
987 return false;
989 /* If oprnd has other uses besides that in stmt we cannot mark it
990 as being part of a pattern only. */
991 if (!has_single_use (oprnd))
992 return false;
994 /* If we are in the middle of a sequence, we use DEF from a previous
995 statement. Otherwise, OPRND has to be a result of type promotion. */
996 if (*new_type)
998 half_type = *new_type;
999 oprnd = def;
1001 else
1003 first = true;
1004 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1005 &promotion)
1006 || !promotion
1007 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1008 return false;
1011 /* Can we perform the operation on a smaller type? */
1012 switch (code)
1014 case BIT_IOR_EXPR:
1015 case BIT_XOR_EXPR:
1016 case BIT_AND_EXPR:
1017 if (!int_fits_type_p (const_oprnd, half_type))
1019 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1020 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1021 return false;
1023 interm_type = build_nonstandard_integer_type (
1024 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1025 if (!int_fits_type_p (const_oprnd, interm_type))
1026 return false;
1029 break;
1031 case LSHIFT_EXPR:
1032 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1033 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1034 return false;
1036 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1037 (e.g., if the original value was char, the shift amount is at most 8
1038 if we want to use short). */
1039 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1040 return false;
1042 interm_type = build_nonstandard_integer_type (
1043 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1045 if (!vect_supportable_shift (code, interm_type))
1046 return false;
1048 break;
1050 case RSHIFT_EXPR:
1051 if (vect_supportable_shift (code, half_type))
1052 break;
1054 /* Try intermediate type - HALF_TYPE is not supported. */
1055 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1056 return false;
1058 interm_type = build_nonstandard_integer_type (
1059 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1061 if (!vect_supportable_shift (code, interm_type))
1062 return false;
1064 break;
1066 default:
1067 gcc_unreachable ();
1070 /* There are four possible cases:
1071 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1072 the first statement in the sequence)
1073 a. The original, HALF_TYPE, is not enough - we replace the promotion
1074 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1075 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1076 promotion.
1077 2. OPRND is defined by a pattern statement we created.
1078 a. Its type is not sufficient for the operation, we create a new stmt:
1079 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1080 this statement in NEW_DEF_STMT, and it is later put in
1081 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1082 b. OPRND is good to use in the new statement. */
1083 if (first)
1085 if (interm_type)
1087 /* Replace the original type conversion HALF_TYPE->TYPE with
1088 HALF_TYPE->INTERM_TYPE. */
1089 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1091 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1092 /* Check if the already created pattern stmt is what we need. */
1093 if (!is_gimple_assign (new_stmt)
1094 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1095 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1096 return false;
1098 stmts->safe_push (def_stmt);
1099 oprnd = gimple_assign_lhs (new_stmt);
1101 else
1103 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1104 oprnd = gimple_assign_rhs1 (def_stmt);
1105 new_oprnd = make_ssa_name (interm_type, NULL);
1106 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1107 oprnd, NULL_TREE);
1108 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1109 stmts->safe_push (def_stmt);
1110 oprnd = new_oprnd;
1113 else
1115 /* Retrieve the operand before the type promotion. */
1116 oprnd = gimple_assign_rhs1 (def_stmt);
1119 else
1121 if (interm_type)
1123 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1124 new_oprnd = make_ssa_name (interm_type, NULL);
1125 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1126 oprnd, NULL_TREE);
1127 oprnd = new_oprnd;
1128 *new_def_stmt = new_stmt;
1131 /* Otherwise, OPRND is already set. */
1134 if (interm_type)
1135 *new_type = interm_type;
1136 else
1137 *new_type = half_type;
1139 *op0 = oprnd;
1140 *op1 = fold_convert (*new_type, const_oprnd);
1142 return true;
1146 /* Try to find a statement or a sequence of statements that can be performed
1147 on a smaller type:
1149 type x_t;
1150 TYPE x_T, res0_T, res1_T;
1151 loop:
1152 S1 x_t = *p;
1153 S2 x_T = (TYPE) x_t;
1154 S3 res0_T = op (x_T, C0);
1155 S4 res1_T = op (res0_T, C1);
1156 S5 ... = () res1_T; - type demotion
1158 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1159 constants.
1160 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1161 be 'type' or some intermediate type. For now, we expect S5 to be a type
1162 demotion operation. We also check that S3 and S4 have only one use. */
1164 static gimple
1165 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1166 tree *type_in, tree *type_out)
1168 gimple stmt = stmts->pop ();
1169 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1170 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1171 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1172 bool first;
1173 tree type = NULL;
1175 first = true;
1176 while (1)
1178 if (!vinfo_for_stmt (stmt)
1179 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1180 return NULL;
1182 new_def_stmt = NULL;
1183 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1184 &op0, &op1, &new_def_stmt,
1185 stmts))
1187 if (first)
1188 return NULL;
1189 else
1190 break;
1193 /* STMT can be performed on a smaller type. Check its uses. */
1194 use_stmt = vect_single_imm_use (stmt);
1195 if (!use_stmt || !is_gimple_assign (use_stmt))
1196 return NULL;
1198 /* Create pattern statement for STMT. */
1199 vectype = get_vectype_for_scalar_type (new_type);
1200 if (!vectype)
1201 return NULL;
1203 /* We want to collect all the statements for which we create pattern
1204 statetments, except for the case when the last statement in the
1205 sequence doesn't have a corresponding pattern statement. In such
1206 case we associate the last pattern statement with the last statement
1207 in the sequence. Therefore, we only add the original statement to
1208 the list if we know that it is not the last. */
1209 if (prev_stmt)
1210 stmts->safe_push (prev_stmt);
1212 var = vect_recog_temp_ssa_var (new_type, NULL);
1213 pattern_stmt
1214 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1215 op0, op1);
1216 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1217 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1219 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1222 "created pattern stmt: ");
1223 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
1226 type = gimple_expr_type (stmt);
1227 prev_stmt = stmt;
1228 stmt = use_stmt;
1230 first = false;
1233 /* We got a sequence. We expect it to end with a type demotion operation.
1234 Otherwise, we quit (for now). There are three possible cases: the
1235 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1236 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1237 NEW_TYPE differs (we create a new conversion statement). */
1238 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1240 use_lhs = gimple_assign_lhs (use_stmt);
1241 use_type = TREE_TYPE (use_lhs);
1242 /* Support only type demotion or signedess change. */
1243 if (!INTEGRAL_TYPE_P (use_type)
1244 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1245 return NULL;
1247 /* Check that NEW_TYPE is not bigger than the conversion result. */
1248 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1249 return NULL;
1251 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1252 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1254 /* Create NEW_TYPE->USE_TYPE conversion. */
1255 new_oprnd = make_ssa_name (use_type, NULL);
1256 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1257 var, NULL_TREE);
1258 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1260 *type_in = get_vectype_for_scalar_type (new_type);
1261 *type_out = get_vectype_for_scalar_type (use_type);
1263 /* We created a pattern statement for the last statement in the
1264 sequence, so we don't need to associate it with the pattern
1265 statement created for PREV_STMT. Therefore, we add PREV_STMT
1266 to the list in order to mark it later in vect_pattern_recog_1. */
1267 if (prev_stmt)
1268 stmts->safe_push (prev_stmt);
1270 else
1272 if (prev_stmt)
1273 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1274 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1276 *type_in = vectype;
1277 *type_out = NULL_TREE;
1280 stmts->safe_push (use_stmt);
1282 else
1283 /* TODO: support general case, create a conversion to the correct type. */
1284 return NULL;
1286 /* Pattern detected. */
1287 if (dump_enabled_p ())
1289 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1290 "vect_recog_over_widening_pattern: detected: ");
1291 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
1294 return pattern_stmt;
1297 /* Detect widening shift pattern:
1299 type a_t;
1300 TYPE a_T, res_T;
1302 S1 a_t = ;
1303 S2 a_T = (TYPE) a_t;
1304 S3 res_T = a_T << CONST;
1306 where type 'TYPE' is at least double the size of type 'type'.
1308 Also detect cases where the shift result is immediately converted
1309 to another type 'result_type' that is no larger in size than 'TYPE'.
1310 In those cases we perform a widen-shift that directly results in
1311 'result_type', to avoid a possible over-widening situation:
1313 type a_t;
1314 TYPE a_T, res_T;
1315 result_type res_result;
1317 S1 a_t = ;
1318 S2 a_T = (TYPE) a_t;
1319 S3 res_T = a_T << CONST;
1320 S4 res_result = (result_type) res_T;
1321 '--> res_result' = a_t w<< CONST;
1323 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1324 create an additional pattern stmt for S2 to create a variable of an
1325 intermediate type, and perform widen-shift on the intermediate type:
1327 type a_t;
1328 interm_type a_it;
1329 TYPE a_T, res_T, res_T';
1331 S1 a_t = ;
1332 S2 a_T = (TYPE) a_t;
1333 '--> a_it = (interm_type) a_t;
1334 S3 res_T = a_T << CONST;
1335 '--> res_T' = a_it <<* CONST;
1337 Input/Output:
1339 * STMTS: Contains a stmt from which the pattern search begins.
1340 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1341 in STMTS. When an intermediate type is used and a pattern statement is
1342 created for S2, we also put S2 here (before S3).
1344 Output:
1346 * TYPE_IN: The type of the input arguments to the pattern.
1348 * TYPE_OUT: The type of the output of this pattern.
1350 * Return value: A new stmt that will be used to replace the sequence of
1351 stmts that constitute the pattern. In this case it will be:
1352 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1354 static gimple
1355 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1356 tree *type_in, tree *type_out)
1358 gimple last_stmt = stmts->pop ();
1359 gimple def_stmt0;
1360 tree oprnd0, oprnd1;
1361 tree type, half_type0;
1362 gimple pattern_stmt;
1363 tree vectype, vectype_out = NULL_TREE;
1364 tree var;
1365 enum tree_code dummy_code;
1366 int dummy_int;
1367 vec<tree> dummy_vec;
1368 gimple use_stmt;
1369 bool promotion;
1371 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1372 return NULL;
1374 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1375 return NULL;
1377 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1378 return NULL;
1380 oprnd0 = gimple_assign_rhs1 (last_stmt);
1381 oprnd1 = gimple_assign_rhs2 (last_stmt);
1382 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1383 return NULL;
1385 /* Check operand 0: it has to be defined by a type promotion. */
1386 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1387 &promotion)
1388 || !promotion)
1389 return NULL;
1391 /* Check operand 1: has to be positive. We check that it fits the type
1392 in vect_handle_widen_op_by_const (). */
1393 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1394 return NULL;
1396 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1397 type = gimple_expr_type (last_stmt);
1399 /* Check for subsequent conversion to another type. */
1400 use_stmt = vect_single_imm_use (last_stmt);
1401 if (use_stmt && is_gimple_assign (use_stmt)
1402 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1403 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1405 tree use_lhs = gimple_assign_lhs (use_stmt);
1406 tree use_type = TREE_TYPE (use_lhs);
1408 if (INTEGRAL_TYPE_P (use_type)
1409 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1411 last_stmt = use_stmt;
1412 type = use_type;
1416 /* Check if this a widening operation. */
1417 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1418 &oprnd0, stmts,
1419 type, &half_type0, def_stmt0))
1420 return NULL;
1422 /* Pattern detected. */
1423 if (dump_enabled_p ())
1424 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1425 "vect_recog_widen_shift_pattern: detected: ");
1427 /* Check target support. */
1428 vectype = get_vectype_for_scalar_type (half_type0);
1429 vectype_out = get_vectype_for_scalar_type (type);
1431 if (!vectype
1432 || !vectype_out
1433 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1434 vectype_out, vectype,
1435 &dummy_code, &dummy_code,
1436 &dummy_int, &dummy_vec))
1437 return NULL;
1439 *type_in = vectype;
1440 *type_out = vectype_out;
1442 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1443 var = vect_recog_temp_ssa_var (type, NULL);
1444 pattern_stmt =
1445 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1447 if (dump_enabled_p ())
1448 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1450 stmts->safe_push (last_stmt);
1451 return pattern_stmt;
1454 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1455 vectorized:
1457 type a_t;
1458 TYPE b_T, res_T;
1460 S1 a_t = ;
1461 S2 b_T = ;
1462 S3 res_T = b_T op a_t;
1464 where type 'TYPE' is a type with different size than 'type',
1465 and op is <<, >> or rotate.
1467 Also detect cases:
1469 type a_t;
1470 TYPE b_T, c_T, res_T;
1472 S0 c_T = ;
1473 S1 a_t = (type) c_T;
1474 S2 b_T = ;
1475 S3 res_T = b_T op a_t;
1477 Input/Output:
1479 * STMTS: Contains a stmt from which the pattern search begins,
1480 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1481 with a shift/rotate which has same type on both operands, in the
1482 second case just b_T op c_T, in the first case with added cast
1483 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1485 Output:
1487 * TYPE_IN: The type of the input arguments to the pattern.
1489 * TYPE_OUT: The type of the output of this pattern.
1491 * Return value: A new stmt that will be used to replace the shift/rotate
1492 S3 stmt. */
1494 static gimple
1495 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1496 tree *type_in, tree *type_out)
1498 gimple last_stmt = stmts->pop ();
1499 tree oprnd0, oprnd1, lhs, var;
1500 gimple pattern_stmt, def_stmt;
1501 enum tree_code rhs_code;
1502 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1503 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1504 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1505 enum vect_def_type dt;
1506 tree def;
1508 if (!is_gimple_assign (last_stmt))
1509 return NULL;
1511 rhs_code = gimple_assign_rhs_code (last_stmt);
1512 switch (rhs_code)
1514 case LSHIFT_EXPR:
1515 case RSHIFT_EXPR:
1516 case LROTATE_EXPR:
1517 case RROTATE_EXPR:
1518 break;
1519 default:
1520 return NULL;
1523 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1524 return NULL;
1526 lhs = gimple_assign_lhs (last_stmt);
1527 oprnd0 = gimple_assign_rhs1 (last_stmt);
1528 oprnd1 = gimple_assign_rhs2 (last_stmt);
1529 if (TREE_CODE (oprnd0) != SSA_NAME
1530 || TREE_CODE (oprnd1) != SSA_NAME
1531 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1532 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1533 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1534 || TYPE_PRECISION (TREE_TYPE (lhs))
1535 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1536 return NULL;
1538 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1539 &def, &dt))
1540 return NULL;
1542 if (dt != vect_internal_def)
1543 return NULL;
1545 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1546 *type_out = *type_in;
1547 if (*type_in == NULL_TREE)
1548 return NULL;
1550 def = NULL_TREE;
1551 if (gimple_assign_cast_p (def_stmt))
1553 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1554 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1555 && TYPE_PRECISION (TREE_TYPE (rhs1))
1556 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1557 def = rhs1;
1560 if (def == NULL_TREE)
1562 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1563 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1564 NULL_TREE);
1565 new_pattern_def_seq (stmt_vinfo, def_stmt);
1568 /* Pattern detected. */
1569 if (dump_enabled_p ())
1570 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1571 "vect_recog_vector_vector_shift_pattern: detected: ");
1573 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1574 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1575 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1577 if (dump_enabled_p ())
1578 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1580 stmts->safe_push (last_stmt);
1581 return pattern_stmt;
1584 /* Detect a signed division by a constant that wouldn't be
1585 otherwise vectorized:
1587 type a_t, b_t;
1589 S1 a_t = b_t / N;
1591 where type 'type' is an integral type and N is a constant.
1593 Similarly handle modulo by a constant:
1595 S4 a_t = b_t % N;
1597 Input/Output:
1599 * STMTS: Contains a stmt from which the pattern search begins,
1600 i.e. the division stmt. S1 is replaced by if N is a power
1601 of two constant and type is signed:
1602 S3 y_t = b_t < 0 ? N - 1 : 0;
1603 S2 x_t = b_t + y_t;
1604 S1' a_t = x_t >> log2 (N);
1606 S4 is replaced if N is a power of two constant and
1607 type is signed by (where *_T temporaries have unsigned type):
1608 S9 y_T = b_t < 0 ? -1U : 0U;
1609 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1610 S7 z_t = (type) z_T;
1611 S6 w_t = b_t + z_t;
1612 S5 x_t = w_t & (N - 1);
1613 S4' a_t = x_t - z_t;
1615 Output:
1617 * TYPE_IN: The type of the input arguments to the pattern.
1619 * TYPE_OUT: The type of the output of this pattern.
1621 * Return value: A new stmt that will be used to replace the division
1622 S1 or modulo S4 stmt. */
1624 static gimple
1625 vect_recog_divmod_pattern (vec<gimple> *stmts,
1626 tree *type_in, tree *type_out)
1628 gimple last_stmt = stmts->pop ();
1629 tree oprnd0, oprnd1, vectype, itype, cond;
1630 gimple pattern_stmt, def_stmt;
1631 enum tree_code rhs_code;
1632 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1633 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1634 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1635 optab optab;
1636 tree q;
1637 int dummy_int, prec;
1638 stmt_vec_info def_stmt_vinfo;
1640 if (!is_gimple_assign (last_stmt))
1641 return NULL;
1643 rhs_code = gimple_assign_rhs_code (last_stmt);
1644 switch (rhs_code)
1646 case TRUNC_DIV_EXPR:
1647 case TRUNC_MOD_EXPR:
1648 break;
1649 default:
1650 return NULL;
1653 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1654 return NULL;
1656 oprnd0 = gimple_assign_rhs1 (last_stmt);
1657 oprnd1 = gimple_assign_rhs2 (last_stmt);
1658 itype = TREE_TYPE (oprnd0);
1659 if (TREE_CODE (oprnd0) != SSA_NAME
1660 || TREE_CODE (oprnd1) != INTEGER_CST
1661 || TREE_CODE (itype) != INTEGER_TYPE
1662 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1663 return NULL;
1665 vectype = get_vectype_for_scalar_type (itype);
1666 if (vectype == NULL_TREE)
1667 return NULL;
1669 /* If the target can handle vectorized division or modulo natively,
1670 don't attempt to optimize this. */
1671 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1672 if (optab != unknown_optab)
1674 enum machine_mode vec_mode = TYPE_MODE (vectype);
1675 int icode = (int) optab_handler (optab, vec_mode);
1676 if (icode != CODE_FOR_nothing)
1677 return NULL;
1680 prec = TYPE_PRECISION (itype);
1681 if (integer_pow2p (oprnd1))
1683 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1684 return NULL;
1686 /* Pattern detected. */
1687 if (dump_enabled_p ())
1688 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1689 "vect_recog_divmod_pattern: detected: ");
1691 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1692 build_int_cst (itype, 0));
1693 if (rhs_code == TRUNC_DIV_EXPR)
1695 tree var = vect_recog_temp_ssa_var (itype, NULL);
1696 tree shift;
1697 def_stmt
1698 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1699 fold_build2 (MINUS_EXPR, itype,
1700 oprnd1,
1701 build_int_cst (itype,
1702 1)),
1703 build_int_cst (itype, 0));
1704 new_pattern_def_seq (stmt_vinfo, def_stmt);
1705 var = vect_recog_temp_ssa_var (itype, NULL);
1706 def_stmt
1707 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1708 gimple_assign_lhs (def_stmt));
1709 append_pattern_def_seq (stmt_vinfo, def_stmt);
1711 shift = build_int_cst (itype, tree_log2 (oprnd1));
1712 pattern_stmt
1713 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1714 vect_recog_temp_ssa_var (itype,
1715 NULL),
1716 var, shift);
1718 else
1720 tree signmask;
1721 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1722 if (compare_tree_int (oprnd1, 2) == 0)
1724 signmask = vect_recog_temp_ssa_var (itype, NULL);
1725 def_stmt
1726 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1727 build_int_cst (itype, 1),
1728 build_int_cst (itype, 0));
1729 append_pattern_def_seq (stmt_vinfo, def_stmt);
1731 else
1733 tree utype
1734 = build_nonstandard_integer_type (prec, 1);
1735 tree vecutype = get_vectype_for_scalar_type (utype);
1736 tree shift
1737 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1738 - tree_log2 (oprnd1));
1739 tree var = vect_recog_temp_ssa_var (utype, NULL);
1741 def_stmt
1742 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1743 build_int_cst (utype, -1),
1744 build_int_cst (utype, 0));
1745 def_stmt_vinfo
1746 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1747 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1748 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1749 append_pattern_def_seq (stmt_vinfo, def_stmt);
1750 var = vect_recog_temp_ssa_var (utype, NULL);
1751 def_stmt
1752 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1753 gimple_assign_lhs (def_stmt),
1754 shift);
1755 def_stmt_vinfo
1756 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1757 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1758 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1759 append_pattern_def_seq (stmt_vinfo, def_stmt);
1760 signmask = vect_recog_temp_ssa_var (itype, NULL);
1761 def_stmt
1762 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1763 NULL_TREE);
1764 append_pattern_def_seq (stmt_vinfo, def_stmt);
1766 def_stmt
1767 = gimple_build_assign_with_ops (PLUS_EXPR,
1768 vect_recog_temp_ssa_var (itype,
1769 NULL),
1770 oprnd0, signmask);
1771 append_pattern_def_seq (stmt_vinfo, def_stmt);
1772 def_stmt
1773 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1774 vect_recog_temp_ssa_var (itype,
1775 NULL),
1776 gimple_assign_lhs (def_stmt),
1777 fold_build2 (MINUS_EXPR, itype,
1778 oprnd1,
1779 build_int_cst (itype,
1780 1)));
1781 append_pattern_def_seq (stmt_vinfo, def_stmt);
1783 pattern_stmt
1784 = gimple_build_assign_with_ops (MINUS_EXPR,
1785 vect_recog_temp_ssa_var (itype,
1786 NULL),
1787 gimple_assign_lhs (def_stmt),
1788 signmask);
1791 if (dump_enabled_p ())
1792 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
1795 stmts->safe_push (last_stmt);
1797 *type_in = vectype;
1798 *type_out = vectype;
1799 return pattern_stmt;
1802 if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
1803 || integer_zerop (oprnd1)
1804 || prec > HOST_BITS_PER_WIDE_INT)
1805 return NULL;
1807 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
1808 return NULL;
1810 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1812 if (TYPE_UNSIGNED (itype))
1814 unsigned HOST_WIDE_INT mh, ml;
1815 int pre_shift, post_shift;
1816 unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
1817 & GET_MODE_MASK (TYPE_MODE (itype));
1818 tree t1, t2, t3, t4;
1820 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
1821 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
1822 return NULL;
1824 /* Find a suitable multiplier and right shift count
1825 instead of multiplying with D. */
1826 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
1828 /* If the suggested multiplier is more than SIZE bits, we can do better
1829 for even divisors, using an initial right shift. */
1830 if (mh != 0 && (d & 1) == 0)
1832 pre_shift = floor_log2 (d & -d);
1833 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
1834 &ml, &post_shift, &dummy_int);
1835 gcc_assert (!mh);
1837 else
1838 pre_shift = 0;
1840 if (mh != 0)
1842 if (post_shift - 1 >= prec)
1843 return NULL;
1845 /* t1 = oprnd0 h* ml;
1846 t2 = oprnd0 - t1;
1847 t3 = t2 >> 1;
1848 t4 = t1 + t3;
1849 q = t4 >> (post_shift - 1); */
1850 t1 = vect_recog_temp_ssa_var (itype, NULL);
1851 def_stmt
1852 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1853 build_int_cst (itype, ml));
1854 append_pattern_def_seq (stmt_vinfo, def_stmt);
1856 t2 = vect_recog_temp_ssa_var (itype, NULL);
1857 def_stmt
1858 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
1859 append_pattern_def_seq (stmt_vinfo, def_stmt);
1861 t3 = vect_recog_temp_ssa_var (itype, NULL);
1862 def_stmt
1863 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1864 integer_one_node);
1865 append_pattern_def_seq (stmt_vinfo, def_stmt);
1867 t4 = vect_recog_temp_ssa_var (itype, NULL);
1868 def_stmt
1869 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
1871 if (post_shift != 1)
1873 append_pattern_def_seq (stmt_vinfo, def_stmt);
1875 q = vect_recog_temp_ssa_var (itype, NULL);
1876 pattern_stmt
1877 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
1878 build_int_cst (itype,
1879 post_shift
1880 - 1));
1882 else
1884 q = t4;
1885 pattern_stmt = def_stmt;
1888 else
1890 if (pre_shift >= prec || post_shift >= prec)
1891 return NULL;
1893 /* t1 = oprnd0 >> pre_shift;
1894 t2 = t1 h* ml;
1895 q = t2 >> post_shift; */
1896 if (pre_shift)
1898 t1 = vect_recog_temp_ssa_var (itype, NULL);
1899 def_stmt
1900 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
1901 build_int_cst (NULL,
1902 pre_shift));
1903 append_pattern_def_seq (stmt_vinfo, def_stmt);
1905 else
1906 t1 = oprnd0;
1908 t2 = vect_recog_temp_ssa_var (itype, NULL);
1909 def_stmt
1910 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
1911 build_int_cst (itype, ml));
1913 if (post_shift)
1915 append_pattern_def_seq (stmt_vinfo, def_stmt);
1917 q = vect_recog_temp_ssa_var (itype, NULL);
1918 def_stmt
1919 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
1920 build_int_cst (itype,
1921 post_shift));
1923 else
1924 q = t2;
1926 pattern_stmt = def_stmt;
1929 else
1931 unsigned HOST_WIDE_INT ml;
1932 int post_shift;
1933 HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
1934 unsigned HOST_WIDE_INT abs_d;
1935 bool add = false;
1936 tree t1, t2, t3, t4;
1938 /* Give up for -1. */
1939 if (d == -1)
1940 return NULL;
1942 /* Since d might be INT_MIN, we have to cast to
1943 unsigned HOST_WIDE_INT before negating to avoid
1944 undefined signed overflow. */
1945 abs_d = (d >= 0
1946 ? (unsigned HOST_WIDE_INT) d
1947 : - (unsigned HOST_WIDE_INT) d);
1949 /* n rem d = n rem -d */
1950 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
1952 d = abs_d;
1953 oprnd1 = build_int_cst (itype, abs_d);
1955 else if (HOST_BITS_PER_WIDE_INT >= prec
1956 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1957 /* This case is not handled correctly below. */
1958 return NULL;
1960 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
1961 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1963 add = true;
1964 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
1966 if (post_shift >= prec)
1967 return NULL;
1969 /* t1 = oprnd1 h* ml; */
1970 t1 = vect_recog_temp_ssa_var (itype, NULL);
1971 def_stmt
1972 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1973 build_int_cst (itype, ml));
1974 append_pattern_def_seq (stmt_vinfo, def_stmt);
1976 if (add)
1978 /* t2 = t1 + oprnd0; */
1979 t2 = vect_recog_temp_ssa_var (itype, NULL);
1980 def_stmt
1981 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
1982 append_pattern_def_seq (stmt_vinfo, def_stmt);
1984 else
1985 t2 = t1;
1987 if (post_shift)
1989 /* t3 = t2 >> post_shift; */
1990 t3 = vect_recog_temp_ssa_var (itype, NULL);
1991 def_stmt
1992 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1993 build_int_cst (itype, post_shift));
1994 append_pattern_def_seq (stmt_vinfo, def_stmt);
1996 else
1997 t3 = t2;
1999 /* t4 = oprnd0 >> (prec - 1); */
2000 t4 = vect_recog_temp_ssa_var (itype, NULL);
2001 def_stmt
2002 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2003 build_int_cst (itype, prec - 1));
2004 append_pattern_def_seq (stmt_vinfo, def_stmt);
2006 /* q = t3 - t4; or q = t4 - t3; */
2007 q = vect_recog_temp_ssa_var (itype, NULL);
2008 pattern_stmt
2009 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2010 d < 0 ? t3 : t4);
2013 if (rhs_code == TRUNC_MOD_EXPR)
2015 tree r, t1;
2017 /* We divided. Now finish by:
2018 t1 = q * oprnd1;
2019 r = oprnd0 - t1; */
2020 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2022 t1 = vect_recog_temp_ssa_var (itype, NULL);
2023 def_stmt
2024 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2025 append_pattern_def_seq (stmt_vinfo, def_stmt);
2027 r = vect_recog_temp_ssa_var (itype, NULL);
2028 pattern_stmt
2029 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2032 /* Pattern detected. */
2033 if (dump_enabled_p ())
2035 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2036 "vect_recog_divmod_pattern: detected: ");
2037 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2040 stmts->safe_push (last_stmt);
2042 *type_in = vectype;
2043 *type_out = vectype;
2044 return pattern_stmt;
2047 /* Function vect_recog_mixed_size_cond_pattern
2049 Try to find the following pattern:
2051 type x_t, y_t;
2052 TYPE a_T, b_T, c_T;
2053 loop:
2054 S1 a_T = x_t CMP y_t ? b_T : c_T;
2056 where type 'TYPE' is an integral type which has different size
2057 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2058 than 'type', the constants need to fit into an integer type
2059 with the same width as 'type') or results of conversion from 'type'.
2061 Input:
2063 * LAST_STMT: A stmt from which the pattern search begins.
2065 Output:
2067 * TYPE_IN: The type of the input arguments to the pattern.
2069 * TYPE_OUT: The type of the output of this pattern.
2071 * Return value: A new stmt that will be used to replace the pattern.
2072 Additionally a def_stmt is added.
2074 a_it = x_t CMP y_t ? b_it : c_it;
2075 a_T = (TYPE) a_it; */
2077 static gimple
2078 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2079 tree *type_out)
2081 gimple last_stmt = (*stmts)[0];
2082 tree cond_expr, then_clause, else_clause;
2083 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2084 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2085 enum machine_mode cmpmode;
2086 gimple pattern_stmt, def_stmt;
2087 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2088 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2089 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2090 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2091 bool promotion;
2092 tree comp_scalar_type;
2094 if (!is_gimple_assign (last_stmt)
2095 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2096 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2097 return NULL;
2099 cond_expr = gimple_assign_rhs1 (last_stmt);
2100 then_clause = gimple_assign_rhs2 (last_stmt);
2101 else_clause = gimple_assign_rhs3 (last_stmt);
2103 if (!COMPARISON_CLASS_P (cond_expr))
2104 return NULL;
2106 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2107 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2108 if (comp_vectype == NULL_TREE)
2109 return NULL;
2111 type = gimple_expr_type (last_stmt);
2112 if (types_compatible_p (type, comp_scalar_type)
2113 || ((TREE_CODE (then_clause) != INTEGER_CST
2114 || TREE_CODE (else_clause) != INTEGER_CST)
2115 && !INTEGRAL_TYPE_P (comp_scalar_type))
2116 || !INTEGRAL_TYPE_P (type))
2117 return NULL;
2119 if ((TREE_CODE (then_clause) != INTEGER_CST
2120 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2121 &def_stmt0, &promotion))
2122 || (TREE_CODE (else_clause) != INTEGER_CST
2123 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2124 &def_stmt1, &promotion)))
2125 return NULL;
2127 if (orig_type0 && orig_type1
2128 && !types_compatible_p (orig_type0, orig_type1))
2129 return NULL;
2131 if (orig_type0)
2133 if (!types_compatible_p (orig_type0, comp_scalar_type))
2134 return NULL;
2135 then_clause = gimple_assign_rhs1 (def_stmt0);
2136 itype = orig_type0;
2139 if (orig_type1)
2141 if (!types_compatible_p (orig_type1, comp_scalar_type))
2142 return NULL;
2143 else_clause = gimple_assign_rhs1 (def_stmt1);
2144 itype = orig_type1;
2147 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2149 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2150 return NULL;
2152 vectype = get_vectype_for_scalar_type (type);
2153 if (vectype == NULL_TREE)
2154 return NULL;
2156 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2157 return NULL;
2159 if (itype == NULL_TREE)
2160 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2161 TYPE_UNSIGNED (type));
2163 if (itype == NULL_TREE
2164 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2165 return NULL;
2167 vecitype = get_vectype_for_scalar_type (itype);
2168 if (vecitype == NULL_TREE)
2169 return NULL;
2171 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2172 return NULL;
2174 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2176 if ((TREE_CODE (then_clause) == INTEGER_CST
2177 && !int_fits_type_p (then_clause, itype))
2178 || (TREE_CODE (else_clause) == INTEGER_CST
2179 && !int_fits_type_p (else_clause, itype)))
2180 return NULL;
2183 def_stmt
2184 = gimple_build_assign_with_ops (COND_EXPR,
2185 vect_recog_temp_ssa_var (itype, NULL),
2186 unshare_expr (cond_expr),
2187 fold_convert (itype, then_clause),
2188 fold_convert (itype, else_clause));
2189 pattern_stmt
2190 = gimple_build_assign_with_ops (NOP_EXPR,
2191 vect_recog_temp_ssa_var (type, NULL),
2192 gimple_assign_lhs (def_stmt), NULL_TREE);
2194 new_pattern_def_seq (stmt_vinfo, def_stmt);
2195 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2196 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2197 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2198 *type_in = vecitype;
2199 *type_out = vectype;
2201 if (dump_enabled_p ())
2202 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2203 "vect_recog_mixed_size_cond_pattern: detected: ");
2205 return pattern_stmt;
2209 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2210 true if bool VAR can be optimized that way. */
2212 static bool
2213 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2215 gimple def_stmt;
2216 enum vect_def_type dt;
2217 tree def, rhs1;
2218 enum tree_code rhs_code;
2220 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2221 &dt))
2222 return false;
2224 if (dt != vect_internal_def)
2225 return false;
2227 if (!is_gimple_assign (def_stmt))
2228 return false;
2230 if (!has_single_use (def))
2231 return false;
2233 rhs1 = gimple_assign_rhs1 (def_stmt);
2234 rhs_code = gimple_assign_rhs_code (def_stmt);
2235 switch (rhs_code)
2237 case SSA_NAME:
2238 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2240 CASE_CONVERT:
2241 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2242 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2243 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2244 return false;
2245 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2247 case BIT_NOT_EXPR:
2248 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2250 case BIT_AND_EXPR:
2251 case BIT_IOR_EXPR:
2252 case BIT_XOR_EXPR:
2253 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2254 return false;
2255 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2256 bb_vinfo);
2258 default:
2259 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2261 tree vecitype, comp_vectype;
2263 /* If the comparison can throw, then is_gimple_condexpr will be
2264 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2265 if (stmt_could_throw_p (def_stmt))
2266 return false;
2268 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2269 if (comp_vectype == NULL_TREE)
2270 return false;
2272 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2274 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2275 tree itype
2276 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2277 vecitype = get_vectype_for_scalar_type (itype);
2278 if (vecitype == NULL_TREE)
2279 return false;
2281 else
2282 vecitype = comp_vectype;
2283 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2285 return false;
2290 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2291 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2292 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2294 static tree
2295 adjust_bool_pattern_cast (tree type, tree var)
2297 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2298 gimple cast_stmt, pattern_stmt;
2300 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2301 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2302 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2303 cast_stmt
2304 = gimple_build_assign_with_ops (NOP_EXPR,
2305 vect_recog_temp_ssa_var (type, NULL),
2306 gimple_assign_lhs (pattern_stmt),
2307 NULL_TREE);
2308 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2309 return gimple_assign_lhs (cast_stmt);
2313 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2314 recursively. VAR is an SSA_NAME that should be transformed from bool
2315 to a wider integer type, OUT_TYPE is the desired final integer type of
2316 the whole pattern, TRUEVAL should be NULL unless optimizing
2317 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2318 in the then_clause, STMTS is where statements with added pattern stmts
2319 should be pushed to. */
2321 static tree
2322 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2323 vec<gimple> *stmts)
2325 gimple stmt = SSA_NAME_DEF_STMT (var);
2326 enum tree_code rhs_code, def_rhs_code;
2327 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2328 location_t loc;
2329 gimple pattern_stmt, def_stmt;
2331 rhs1 = gimple_assign_rhs1 (stmt);
2332 rhs2 = gimple_assign_rhs2 (stmt);
2333 rhs_code = gimple_assign_rhs_code (stmt);
2334 loc = gimple_location (stmt);
2335 switch (rhs_code)
2337 case SSA_NAME:
2338 CASE_CONVERT:
2339 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2340 itype = TREE_TYPE (irhs1);
2341 pattern_stmt
2342 = gimple_build_assign_with_ops (SSA_NAME,
2343 vect_recog_temp_ssa_var (itype, NULL),
2344 irhs1, NULL_TREE);
2345 break;
2347 case BIT_NOT_EXPR:
2348 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2349 itype = TREE_TYPE (irhs1);
2350 pattern_stmt
2351 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2352 vect_recog_temp_ssa_var (itype, NULL),
2353 irhs1, build_int_cst (itype, 1));
2354 break;
2356 case BIT_AND_EXPR:
2357 /* Try to optimize x = y & (a < b ? 1 : 0); into
2358 x = (a < b ? y : 0);
2360 E.g. for:
2361 bool a_b, b_b, c_b;
2362 TYPE d_T;
2364 S1 a_b = x1 CMP1 y1;
2365 S2 b_b = x2 CMP2 y2;
2366 S3 c_b = a_b & b_b;
2367 S4 d_T = (TYPE) c_b;
2369 we would normally emit:
2371 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2372 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2373 S3' c_T = a_T & b_T;
2374 S4' d_T = c_T;
2376 but we can save one stmt by using the
2377 result of one of the COND_EXPRs in the other COND_EXPR and leave
2378 BIT_AND_EXPR stmt out:
2380 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2381 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2382 S4' f_T = c_T;
2384 At least when VEC_COND_EXPR is implemented using masks
2385 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2386 computes the comparison masks and ands it, in one case with
2387 all ones vector, in the other case with a vector register.
2388 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2389 often more expensive. */
2390 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2391 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2392 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2394 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2395 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2396 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2397 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2399 gimple tstmt;
2400 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2401 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2402 tstmt = stmts->pop ();
2403 gcc_assert (tstmt == def_stmt);
2404 stmts->quick_push (stmt);
2405 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2406 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2407 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2408 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2409 return irhs2;
2411 else
2412 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2413 goto and_ior_xor;
2415 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2416 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2417 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2419 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2420 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2421 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2422 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2424 gimple tstmt;
2425 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2426 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2427 tstmt = stmts->pop ();
2428 gcc_assert (tstmt == def_stmt);
2429 stmts->quick_push (stmt);
2430 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2431 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2432 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2433 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2434 return irhs1;
2436 else
2437 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2438 goto and_ior_xor;
2440 /* FALLTHRU */
2441 case BIT_IOR_EXPR:
2442 case BIT_XOR_EXPR:
2443 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2444 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2445 and_ior_xor:
2446 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2447 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2449 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2450 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2451 int out_prec = TYPE_PRECISION (out_type);
2452 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2453 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2454 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2455 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2456 else
2458 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2459 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2462 itype = TREE_TYPE (irhs1);
2463 pattern_stmt
2464 = gimple_build_assign_with_ops (rhs_code,
2465 vect_recog_temp_ssa_var (itype, NULL),
2466 irhs1, irhs2);
2467 break;
2469 default:
2470 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2471 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2472 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2473 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2474 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2476 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2477 itype
2478 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2480 else
2481 itype = TREE_TYPE (rhs1);
2482 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2483 if (trueval == NULL_TREE)
2484 trueval = build_int_cst (itype, 1);
2485 else
2486 gcc_checking_assert (useless_type_conversion_p (itype,
2487 TREE_TYPE (trueval)));
2488 pattern_stmt
2489 = gimple_build_assign_with_ops (COND_EXPR,
2490 vect_recog_temp_ssa_var (itype, NULL),
2491 cond_expr, trueval,
2492 build_int_cst (itype, 0));
2493 break;
2496 stmts->safe_push (stmt);
2497 gimple_set_location (pattern_stmt, loc);
2498 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2499 return gimple_assign_lhs (pattern_stmt);
2503 /* Function vect_recog_bool_pattern
2505 Try to find pattern like following:
2507 bool a_b, b_b, c_b, d_b, e_b;
2508 TYPE f_T;
2509 loop:
2510 S1 a_b = x1 CMP1 y1;
2511 S2 b_b = x2 CMP2 y2;
2512 S3 c_b = a_b & b_b;
2513 S4 d_b = x3 CMP3 y3;
2514 S5 e_b = c_b | d_b;
2515 S6 f_T = (TYPE) e_b;
2517 where type 'TYPE' is an integral type.
2519 Input:
2521 * LAST_STMT: A stmt at the end from which the pattern
2522 search begins, i.e. cast of a bool to
2523 an integer type.
2525 Output:
2527 * TYPE_IN: The type of the input arguments to the pattern.
2529 * TYPE_OUT: The type of the output of this pattern.
2531 * Return value: A new stmt that will be used to replace the pattern.
2533 Assuming size of TYPE is the same as size of all comparisons
2534 (otherwise some casts would be added where needed), the above
2535 sequence we create related pattern stmts:
2536 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2537 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2538 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2539 S5' e_T = c_T | d_T;
2540 S6' f_T = e_T;
2542 Instead of the above S3' we could emit:
2543 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2544 S3' c_T = a_T | b_T;
2545 but the above is more efficient. */
2547 static gimple
2548 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2549 tree *type_out)
2551 gimple last_stmt = stmts->pop ();
2552 enum tree_code rhs_code;
2553 tree var, lhs, rhs, vectype;
2554 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2555 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2556 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2557 gimple pattern_stmt;
2559 if (!is_gimple_assign (last_stmt))
2560 return NULL;
2562 var = gimple_assign_rhs1 (last_stmt);
2563 lhs = gimple_assign_lhs (last_stmt);
2565 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2566 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2567 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2568 return NULL;
2570 rhs_code = gimple_assign_rhs_code (last_stmt);
2571 if (CONVERT_EXPR_CODE_P (rhs_code))
2573 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2574 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2575 return NULL;
2576 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2577 if (vectype == NULL_TREE)
2578 return NULL;
2580 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2581 return NULL;
2583 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2584 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2585 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2586 pattern_stmt
2587 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2588 else
2589 pattern_stmt
2590 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2591 *type_out = vectype;
2592 *type_in = vectype;
2593 stmts->safe_push (last_stmt);
2594 if (dump_enabled_p ())
2595 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2596 "vect_recog_bool_pattern: detected: ");
2598 return pattern_stmt;
2600 else if (rhs_code == SSA_NAME
2601 && STMT_VINFO_DATA_REF (stmt_vinfo))
2603 stmt_vec_info pattern_stmt_info;
2604 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2605 gcc_assert (vectype != NULL_TREE);
2606 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2607 return NULL;
2608 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2609 return NULL;
2611 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2612 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2613 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2615 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2616 gimple cast_stmt
2617 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2618 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2619 rhs = rhs2;
2621 pattern_stmt
2622 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2623 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2624 bb_vinfo);
2625 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2626 STMT_VINFO_DATA_REF (pattern_stmt_info)
2627 = STMT_VINFO_DATA_REF (stmt_vinfo);
2628 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2629 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2630 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2631 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2632 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2633 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2634 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2635 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2636 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2637 *type_out = vectype;
2638 *type_in = vectype;
2639 stmts->safe_push (last_stmt);
2640 if (dump_enabled_p ())
2641 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2642 "vect_recog_bool_pattern: detected: ");
2643 return pattern_stmt;
2645 else
2646 return NULL;
2650 /* Mark statements that are involved in a pattern. */
2652 static inline void
2653 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2654 tree pattern_vectype)
2656 stmt_vec_info pattern_stmt_info, def_stmt_info;
2657 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2658 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2659 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2660 gimple def_stmt;
2662 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2663 if (pattern_stmt_info == NULL)
2665 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2666 bb_vinfo);
2667 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2669 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2671 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2672 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2673 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2674 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2675 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2676 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2677 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2678 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2679 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2681 gimple_stmt_iterator si;
2682 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2683 !gsi_end_p (si); gsi_next (&si))
2685 def_stmt = gsi_stmt (si);
2686 def_stmt_info = vinfo_for_stmt (def_stmt);
2687 if (def_stmt_info == NULL)
2689 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2690 bb_vinfo);
2691 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2693 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2694 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2695 STMT_VINFO_DEF_TYPE (def_stmt_info)
2696 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2697 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2698 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2703 /* Function vect_pattern_recog_1
2705 Input:
2706 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2707 computation pattern.
2708 STMT: A stmt from which the pattern search should start.
2710 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2711 expression that computes the same functionality and can be used to
2712 replace the sequence of stmts that are involved in the pattern.
2714 Output:
2715 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2716 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2717 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2718 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2719 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2720 to the available target pattern.
2722 This function also does some bookkeeping, as explained in the documentation
2723 for vect_recog_pattern. */
2725 static void
2726 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2727 gimple_stmt_iterator si,
2728 vec<gimple> *stmts_to_replace)
2730 gimple stmt = gsi_stmt (si), pattern_stmt;
2731 stmt_vec_info stmt_info;
2732 loop_vec_info loop_vinfo;
2733 tree pattern_vectype;
2734 tree type_in, type_out;
2735 enum tree_code code;
2736 int i;
2737 gimple next;
2739 stmts_to_replace->truncate (0);
2740 stmts_to_replace->quick_push (stmt);
2741 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2742 if (!pattern_stmt)
2743 return;
2745 stmt = stmts_to_replace->last ();
2746 stmt_info = vinfo_for_stmt (stmt);
2747 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2749 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2751 /* No need to check target support (already checked by the pattern
2752 recognition function). */
2753 pattern_vectype = type_out ? type_out : type_in;
2755 else
2757 enum machine_mode vec_mode;
2758 enum insn_code icode;
2759 optab optab;
2761 /* Check target support */
2762 type_in = get_vectype_for_scalar_type (type_in);
2763 if (!type_in)
2764 return;
2765 if (type_out)
2766 type_out = get_vectype_for_scalar_type (type_out);
2767 else
2768 type_out = type_in;
2769 if (!type_out)
2770 return;
2771 pattern_vectype = type_out;
2773 if (is_gimple_assign (pattern_stmt))
2774 code = gimple_assign_rhs_code (pattern_stmt);
2775 else
2777 gcc_assert (is_gimple_call (pattern_stmt));
2778 code = CALL_EXPR;
2781 optab = optab_for_tree_code (code, type_in, optab_default);
2782 vec_mode = TYPE_MODE (type_in);
2783 if (!optab
2784 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2785 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2786 return;
2789 /* Found a vectorizable pattern. */
2790 if (dump_enabled_p ())
2792 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2793 "pattern recognized: ");
2794 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2797 /* Mark the stmts that are involved in the pattern. */
2798 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2800 /* Patterns cannot be vectorized using SLP, because they change the order of
2801 computation. */
2802 if (loop_vinfo)
2803 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2804 if (next == stmt)
2805 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
2807 /* It is possible that additional pattern stmts are created and inserted in
2808 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2809 relevant statements. */
2810 for (i = 0; stmts_to_replace->iterate (i, &stmt)
2811 && (unsigned) i < (stmts_to_replace->length () - 1);
2812 i++)
2814 stmt_info = vinfo_for_stmt (stmt);
2815 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2816 if (dump_enabled_p ())
2818 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2819 "additional pattern stmt: ");
2820 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2823 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2828 /* Function vect_pattern_recog
2830 Input:
2831 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2832 computation idioms.
2834 Output - for each computation idiom that is detected we create a new stmt
2835 that provides the same functionality and that can be vectorized. We
2836 also record some information in the struct_stmt_info of the relevant
2837 stmts, as explained below:
2839 At the entry to this function we have the following stmts, with the
2840 following initial value in the STMT_VINFO fields:
2842 stmt in_pattern_p related_stmt vec_stmt
2843 S1: a_i = .... - - -
2844 S2: a_2 = ..use(a_i).. - - -
2845 S3: a_1 = ..use(a_2).. - - -
2846 S4: a_0 = ..use(a_1).. - - -
2847 S5: ... = ..use(a_0).. - - -
2849 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2850 represented by a single stmt. We then:
2851 - create a new stmt S6 equivalent to the pattern (the stmt is not
2852 inserted into the code)
2853 - fill in the STMT_VINFO fields as follows:
2855 in_pattern_p related_stmt vec_stmt
2856 S1: a_i = .... - - -
2857 S2: a_2 = ..use(a_i).. - - -
2858 S3: a_1 = ..use(a_2).. - - -
2859 S4: a_0 = ..use(a_1).. true S6 -
2860 '---> S6: a_new = .... - S4 -
2861 S5: ... = ..use(a_0).. - - -
2863 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2864 to each other through the RELATED_STMT field).
2866 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2867 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2868 remain irrelevant unless used by stmts other than S4.
2870 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2871 (because they are marked as irrelevant). It will vectorize S6, and record
2872 a pointer to the new vector stmt VS6 from S6 (as usual).
2873 S4 will be skipped, and S5 will be vectorized as usual:
2875 in_pattern_p related_stmt vec_stmt
2876 S1: a_i = .... - - -
2877 S2: a_2 = ..use(a_i).. - - -
2878 S3: a_1 = ..use(a_2).. - - -
2879 > VS6: va_new = .... - - -
2880 S4: a_0 = ..use(a_1).. true S6 VS6
2881 '---> S6: a_new = .... - S4 VS6
2882 > VS5: ... = ..vuse(va_new).. - - -
2883 S5: ... = ..use(a_0).. - - -
2885 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2886 elsewhere), and we'll end up with:
2888 VS6: va_new = ....
2889 VS5: ... = ..vuse(va_new)..
2891 In case of more than one pattern statements, e.g., widen-mult with
2892 intermediate type:
2894 S1 a_t = ;
2895 S2 a_T = (TYPE) a_t;
2896 '--> S3: a_it = (interm_type) a_t;
2897 S4 prod_T = a_T * CONST;
2898 '--> S5: prod_T' = a_it w* CONST;
2900 there may be other users of a_T outside the pattern. In that case S2 will
2901 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2902 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2903 be recorded in S3. */
2905 void
2906 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2908 struct loop *loop;
2909 basic_block *bbs;
2910 unsigned int nbbs;
2911 gimple_stmt_iterator si;
2912 unsigned int i, j;
2913 vect_recog_func_ptr vect_recog_func;
2914 vec<gimple> stmts_to_replace;
2915 stmts_to_replace.create (1);
2916 gimple stmt;
2918 if (dump_enabled_p ())
2919 dump_printf_loc (MSG_NOTE, vect_location,
2920 "=== vect_pattern_recog ===");
2922 if (loop_vinfo)
2924 loop = LOOP_VINFO_LOOP (loop_vinfo);
2925 bbs = LOOP_VINFO_BBS (loop_vinfo);
2926 nbbs = loop->num_nodes;
2928 else
2930 bbs = &BB_VINFO_BB (bb_vinfo);
2931 nbbs = 1;
2934 /* Scan through the loop stmts, applying the pattern recognition
2935 functions starting at each stmt visited: */
2936 for (i = 0; i < nbbs; i++)
2938 basic_block bb = bbs[i];
2939 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2941 if (bb_vinfo && (stmt = gsi_stmt (si))
2942 && vinfo_for_stmt (stmt)
2943 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2944 continue;
2946 /* Scan over all generic vect_recog_xxx_pattern functions. */
2947 for (j = 0; j < NUM_PATTERNS; j++)
2949 vect_recog_func = vect_vect_recog_func_ptrs[j];
2950 vect_pattern_recog_1 (vect_recog_func, si,
2951 &stmts_to_replace);
2956 stmts_to_replace.release ();