PR c++/54198
[official-gcc.git] / gcc / tree-vect-patterns.c
blob1b78a54c42e952ab9bc526e06d1bffca918b40af
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "cfgloop.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "params.h"
36 #include "tree-data-ref.h"
37 #include "tree-vectorizer.h"
38 #include "recog.h" /* FIXME: for insn_data */
39 #include "diagnostic-core.h"
40 #include "dumpfile.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 tree *, tree *);
56 static gimple vect_recog_divmod_pattern (VEC (gimple, heap) **,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_widen_shift_pattern,
67 vect_recog_over_widening_pattern,
68 vect_recog_vector_vector_shift_pattern,
69 vect_recog_divmod_pattern,
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
73 static inline void
74 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 stmt);
80 static inline void
81 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
87 /* Check whether STMT2 is in the same loop or basic block as STMT1.
88 Which of the two applies depends on whether we're currently doing
89 loop-based or basic-block-based vectorization, as determined by
90 the vinfo_for_stmt for STMT1 (which must be defined).
92 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
93 to be defined as well. */
95 static bool
96 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
98 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
99 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
100 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
102 if (!gimple_bb (stmt2))
103 return false;
105 if (loop_vinfo)
107 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
108 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
109 return false;
111 else
113 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
114 || gimple_code (stmt2) == GIMPLE_PHI)
115 return false;
118 gcc_assert (vinfo_for_stmt (stmt2));
119 return true;
122 /* If the LHS of DEF_STMT has a single use, and that statement is
123 in the same loop or basic block, return it. */
125 static gimple
126 vect_single_imm_use (gimple def_stmt)
128 tree lhs = gimple_assign_lhs (def_stmt);
129 use_operand_p use_p;
130 gimple use_stmt;
132 if (!single_imm_use (lhs, &use_p, &use_stmt))
133 return NULL;
135 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
136 return NULL;
138 return use_stmt;
141 /* Check whether NAME, an ssa-name used in USE_STMT,
142 is a result of a type promotion or demotion, such that:
143 DEF_STMT: NAME = NOP (name0)
144 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
145 If CHECK_SIGN is TRUE, check that either both types are signed or both are
146 unsigned. */
148 static bool
149 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
150 tree *orig_type, gimple *def_stmt, bool *promotion)
152 tree dummy;
153 gimple dummy_gimple;
154 loop_vec_info loop_vinfo;
155 stmt_vec_info stmt_vinfo;
156 tree type = TREE_TYPE (name);
157 tree oprnd0;
158 enum vect_def_type dt;
159 tree def;
160 bb_vec_info bb_vinfo;
162 stmt_vinfo = vinfo_for_stmt (use_stmt);
163 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
164 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
165 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
166 &def, &dt))
167 return false;
169 if (dt != vect_internal_def
170 && dt != vect_external_def && dt != vect_constant_def)
171 return false;
173 if (!*def_stmt)
174 return false;
176 if (!is_gimple_assign (*def_stmt))
177 return false;
179 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
180 return false;
182 oprnd0 = gimple_assign_rhs1 (*def_stmt);
184 *orig_type = TREE_TYPE (oprnd0);
185 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
186 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
187 return false;
189 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
190 *promotion = true;
191 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
192 *promotion = false;
193 else
194 return false;
196 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
197 bb_vinfo, &dummy_gimple, &dummy, &dt))
198 return false;
200 return true;
203 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
204 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
206 static tree
207 vect_recog_temp_ssa_var (tree type, gimple stmt)
209 return make_temp_ssa_name (type, stmt, "patt");
212 /* Function vect_recog_dot_prod_pattern
214 Try to find the following pattern:
216 type x_t, y_t;
217 TYPE1 prod;
218 TYPE2 sum = init;
219 loop:
220 sum_0 = phi <init, sum_1>
221 S1 x_t = ...
222 S2 y_t = ...
223 S3 x_T = (TYPE1) x_t;
224 S4 y_T = (TYPE1) y_t;
225 S5 prod = x_T * y_T;
226 [S6 prod = (TYPE2) prod; #optional]
227 S7 sum_1 = prod + sum_0;
229 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
230 same size of 'TYPE1' or bigger. This is a special case of a reduction
231 computation.
233 Input:
235 * STMTS: Contains a stmt from which the pattern search begins. In the
236 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
237 will be detected.
239 Output:
241 * TYPE_IN: The type of the input arguments to the pattern.
243 * TYPE_OUT: The type of the output of this pattern.
245 * Return value: A new stmt that will be used to replace the sequence of
246 stmts that constitute the pattern. In this case it will be:
247 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
249 Note: The dot-prod idiom is a widening reduction pattern that is
250 vectorized without preserving all the intermediate results. It
251 produces only N/2 (widened) results (by summing up pairs of
252 intermediate results) rather than all N results. Therefore, we
253 cannot allow this pattern when we want to get all the results and in
254 the correct order (as is the case when this computation is in an
255 inner-loop nested in an outer-loop that us being vectorized). */
257 static gimple
258 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
259 tree *type_out)
261 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
262 tree oprnd0, oprnd1;
263 tree oprnd00, oprnd01;
264 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
265 tree type, half_type;
266 gimple pattern_stmt;
267 tree prod_type;
268 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
269 struct loop *loop;
270 tree var;
271 bool promotion;
273 if (!loop_info)
274 return NULL;
276 loop = LOOP_VINFO_LOOP (loop_info);
278 if (!is_gimple_assign (last_stmt))
279 return NULL;
281 type = gimple_expr_type (last_stmt);
283 /* Look for the following pattern
284 DX = (TYPE1) X;
285 DY = (TYPE1) Y;
286 DPROD = DX * DY;
287 DDPROD = (TYPE2) DPROD;
288 sum_1 = DDPROD + sum_0;
289 In which
290 - DX is double the size of X
291 - DY is double the size of Y
292 - DX, DY, DPROD all have the same type
293 - sum is the same size of DPROD or bigger
294 - sum has been recognized as a reduction variable.
296 This is equivalent to:
297 DPROD = X w* Y; #widen mult
298 sum_1 = DPROD w+ sum_0; #widen summation
300 DPROD = X w* Y; #widen mult
301 sum_1 = DPROD + sum_0; #summation
304 /* Starting from LAST_STMT, follow the defs of its uses in search
305 of the above pattern. */
307 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
308 return NULL;
310 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
312 /* Has been detected as widening-summation? */
314 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
315 type = gimple_expr_type (stmt);
316 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
317 return NULL;
318 oprnd0 = gimple_assign_rhs1 (stmt);
319 oprnd1 = gimple_assign_rhs2 (stmt);
320 half_type = TREE_TYPE (oprnd0);
322 else
324 gimple def_stmt;
326 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
327 return NULL;
328 oprnd0 = gimple_assign_rhs1 (last_stmt);
329 oprnd1 = gimple_assign_rhs2 (last_stmt);
330 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
331 || !types_compatible_p (TREE_TYPE (oprnd1), type))
332 return NULL;
333 stmt = last_stmt;
335 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
336 &promotion)
337 && promotion)
339 stmt = def_stmt;
340 oprnd0 = gimple_assign_rhs1 (stmt);
342 else
343 half_type = type;
346 /* So far so good. Since last_stmt was detected as a (summation) reduction,
347 we know that oprnd1 is the reduction variable (defined by a loop-header
348 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
349 Left to check that oprnd0 is defined by a (widen_)mult_expr */
350 if (TREE_CODE (oprnd0) != SSA_NAME)
351 return NULL;
353 prod_type = half_type;
354 stmt = SSA_NAME_DEF_STMT (oprnd0);
356 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
357 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
358 return NULL;
360 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
361 inside the loop (in case we are analyzing an outer-loop). */
362 if (!is_gimple_assign (stmt))
363 return NULL;
364 stmt_vinfo = vinfo_for_stmt (stmt);
365 gcc_assert (stmt_vinfo);
366 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
367 return NULL;
368 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
369 return NULL;
370 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
372 /* Has been detected as a widening multiplication? */
374 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
375 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
376 return NULL;
377 stmt_vinfo = vinfo_for_stmt (stmt);
378 gcc_assert (stmt_vinfo);
379 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
380 oprnd00 = gimple_assign_rhs1 (stmt);
381 oprnd01 = gimple_assign_rhs2 (stmt);
383 else
385 tree half_type0, half_type1;
386 gimple def_stmt;
387 tree oprnd0, oprnd1;
389 oprnd0 = gimple_assign_rhs1 (stmt);
390 oprnd1 = gimple_assign_rhs2 (stmt);
391 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
392 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
393 return NULL;
394 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
395 &promotion)
396 || !promotion)
397 return NULL;
398 oprnd00 = gimple_assign_rhs1 (def_stmt);
399 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
400 &promotion)
401 || !promotion)
402 return NULL;
403 oprnd01 = gimple_assign_rhs1 (def_stmt);
404 if (!types_compatible_p (half_type0, half_type1))
405 return NULL;
406 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
407 return NULL;
410 half_type = TREE_TYPE (oprnd00);
411 *type_in = half_type;
412 *type_out = type;
414 /* Pattern detected. Create a stmt to be used to replace the pattern: */
415 var = vect_recog_temp_ssa_var (type, NULL);
416 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
417 oprnd00, oprnd01, oprnd1);
419 if (vect_print_dump_info (REPORT_DETAILS))
421 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
422 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
425 /* We don't allow changing the order of the computation in the inner-loop
426 when doing outer-loop vectorization. */
427 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
429 return pattern_stmt;
433 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
434 and LSHIFT_EXPR.
436 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
437 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
439 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
440 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
441 that satisfies the above restrictions, we can perform a widening opeartion
442 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
443 with a_it = (interm_type) a_t; */
445 static bool
446 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
447 tree const_oprnd, tree *oprnd,
448 VEC (gimple, heap) **stmts, tree type,
449 tree *half_type, gimple def_stmt)
451 tree new_type, new_oprnd;
452 gimple new_stmt;
454 if (code != MULT_EXPR && code != LSHIFT_EXPR)
455 return false;
457 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
458 || (code == LSHIFT_EXPR
459 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
460 != 1))
461 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
463 /* CONST_OPRND is a constant of HALF_TYPE. */
464 *oprnd = gimple_assign_rhs1 (def_stmt);
465 return true;
468 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
469 return false;
471 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
472 return false;
474 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
475 a type 2 times bigger than HALF_TYPE. */
476 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
477 TYPE_UNSIGNED (type));
478 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
479 || (code == LSHIFT_EXPR
480 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
481 return false;
483 /* Use NEW_TYPE for widening operation. */
484 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
486 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
487 /* Check if the already created pattern stmt is what we need. */
488 if (!is_gimple_assign (new_stmt)
489 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
490 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
491 return false;
493 VEC_safe_push (gimple, heap, *stmts, def_stmt);
494 *oprnd = gimple_assign_lhs (new_stmt);
496 else
498 /* Create a_T = (NEW_TYPE) a_t; */
499 *oprnd = gimple_assign_rhs1 (def_stmt);
500 new_oprnd = make_ssa_name (new_type, NULL);
501 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
502 NULL_TREE);
503 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
504 VEC_safe_push (gimple, heap, *stmts, def_stmt);
505 *oprnd = new_oprnd;
508 *half_type = new_type;
509 return true;
513 /* Function vect_recog_widen_mult_pattern
515 Try to find the following pattern:
517 type a_t, b_t;
518 TYPE a_T, b_T, prod_T;
520 S1 a_t = ;
521 S2 b_t = ;
522 S3 a_T = (TYPE) a_t;
523 S4 b_T = (TYPE) b_t;
524 S5 prod_T = a_T * b_T;
526 where type 'TYPE' is at least double the size of type 'type'.
528 Also detect unsigned cases:
530 unsigned type a_t, b_t;
531 unsigned TYPE u_prod_T;
532 TYPE a_T, b_T, prod_T;
534 S1 a_t = ;
535 S2 b_t = ;
536 S3 a_T = (TYPE) a_t;
537 S4 b_T = (TYPE) b_t;
538 S5 prod_T = a_T * b_T;
539 S6 u_prod_T = (unsigned TYPE) prod_T;
541 and multiplication by constants:
543 type a_t;
544 TYPE a_T, prod_T;
546 S1 a_t = ;
547 S3 a_T = (TYPE) a_t;
548 S5 prod_T = a_T * CONST;
550 A special case of multiplication by constants is when 'TYPE' is 4 times
551 bigger than 'type', but CONST fits an intermediate type 2 times smaller
552 than 'TYPE'. In that case we create an additional pattern stmt for S3
553 to create a variable of the intermediate type, and perform widen-mult
554 on the intermediate type as well:
556 type a_t;
557 interm_type a_it;
558 TYPE a_T, prod_T, prod_T';
560 S1 a_t = ;
561 S3 a_T = (TYPE) a_t;
562 '--> a_it = (interm_type) a_t;
563 S5 prod_T = a_T * CONST;
564 '--> prod_T' = a_it w* CONST;
566 Input/Output:
568 * STMTS: Contains a stmt from which the pattern search begins. In the
569 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
570 is detected. In case of unsigned widen-mult, the original stmt (S5) is
571 replaced with S6 in STMTS. In case of multiplication by a constant
572 of an intermediate type (the last case above), STMTS also contains S3
573 (inserted before S5).
575 Output:
577 * TYPE_IN: The type of the input arguments to the pattern.
579 * TYPE_OUT: The type of the output of this pattern.
581 * Return value: A new stmt that will be used to replace the sequence of
582 stmts that constitute the pattern. In this case it will be:
583 WIDEN_MULT <a_t, b_t>
586 static gimple
587 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
588 tree *type_in, tree *type_out)
590 gimple last_stmt = VEC_pop (gimple, *stmts);
591 gimple def_stmt0, def_stmt1;
592 tree oprnd0, oprnd1;
593 tree type, half_type0, half_type1;
594 gimple pattern_stmt;
595 tree vectype, vectype_out = NULL_TREE;
596 tree var;
597 enum tree_code dummy_code;
598 int dummy_int;
599 VEC (tree, heap) *dummy_vec;
600 bool op1_ok;
601 bool promotion;
603 if (!is_gimple_assign (last_stmt))
604 return NULL;
606 type = gimple_expr_type (last_stmt);
608 /* Starting from LAST_STMT, follow the defs of its uses in search
609 of the above pattern. */
611 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
612 return NULL;
614 oprnd0 = gimple_assign_rhs1 (last_stmt);
615 oprnd1 = gimple_assign_rhs2 (last_stmt);
616 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
617 || !types_compatible_p (TREE_TYPE (oprnd1), type))
618 return NULL;
620 /* Check argument 0. */
621 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
622 &promotion)
623 || !promotion)
624 return NULL;
625 /* Check argument 1. */
626 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
627 &def_stmt1, &promotion);
629 if (op1_ok && promotion)
631 oprnd0 = gimple_assign_rhs1 (def_stmt0);
632 oprnd1 = gimple_assign_rhs1 (def_stmt1);
634 else
636 if (TREE_CODE (oprnd1) == INTEGER_CST
637 && TREE_CODE (half_type0) == INTEGER_TYPE
638 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
639 &oprnd0, stmts, type,
640 &half_type0, def_stmt0))
641 half_type1 = half_type0;
642 else
643 return NULL;
646 /* Handle unsigned case. Look for
647 S6 u_prod_T = (unsigned TYPE) prod_T;
648 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
649 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
651 gimple use_stmt;
652 tree use_lhs;
653 tree use_type;
655 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
656 return NULL;
658 use_stmt = vect_single_imm_use (last_stmt);
659 if (!use_stmt || !is_gimple_assign (use_stmt)
660 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
661 return NULL;
663 use_lhs = gimple_assign_lhs (use_stmt);
664 use_type = TREE_TYPE (use_lhs);
665 if (!INTEGRAL_TYPE_P (use_type)
666 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
667 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
668 return NULL;
670 type = use_type;
671 last_stmt = use_stmt;
674 if (!types_compatible_p (half_type0, half_type1))
675 return NULL;
677 /* Pattern detected. */
678 if (vect_print_dump_info (REPORT_DETAILS))
679 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
681 /* Check target support */
682 vectype = get_vectype_for_scalar_type (half_type0);
683 vectype_out = get_vectype_for_scalar_type (type);
684 if (!vectype
685 || !vectype_out
686 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
687 vectype_out, vectype,
688 &dummy_code, &dummy_code,
689 &dummy_int, &dummy_vec))
690 return NULL;
692 *type_in = vectype;
693 *type_out = vectype_out;
695 /* Pattern supported. Create a stmt to be used to replace the pattern: */
696 var = vect_recog_temp_ssa_var (type, NULL);
697 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
698 oprnd1);
700 if (vect_print_dump_info (REPORT_DETAILS))
701 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
703 VEC_safe_push (gimple, heap, *stmts, last_stmt);
704 return pattern_stmt;
708 /* Function vect_recog_pow_pattern
710 Try to find the following pattern:
712 x = POW (y, N);
714 with POW being one of pow, powf, powi, powif and N being
715 either 2 or 0.5.
717 Input:
719 * LAST_STMT: A stmt from which the pattern search begins.
721 Output:
723 * TYPE_IN: The type of the input arguments to the pattern.
725 * TYPE_OUT: The type of the output of this pattern.
727 * Return value: A new stmt that will be used to replace the sequence of
728 stmts that constitute the pattern. In this case it will be:
729 x = x * x
731 x = sqrt (x)
734 static gimple
735 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
736 tree *type_out)
738 gimple last_stmt = VEC_index (gimple, *stmts, 0);
739 tree fn, base, exp = NULL;
740 gimple stmt;
741 tree var;
743 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
744 return NULL;
746 fn = gimple_call_fndecl (last_stmt);
747 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
748 return NULL;
750 switch (DECL_FUNCTION_CODE (fn))
752 case BUILT_IN_POWIF:
753 case BUILT_IN_POWI:
754 case BUILT_IN_POWF:
755 case BUILT_IN_POW:
756 base = gimple_call_arg (last_stmt, 0);
757 exp = gimple_call_arg (last_stmt, 1);
758 if (TREE_CODE (exp) != REAL_CST
759 && TREE_CODE (exp) != INTEGER_CST)
760 return NULL;
761 break;
763 default:
764 return NULL;
767 /* We now have a pow or powi builtin function call with a constant
768 exponent. */
770 *type_out = NULL_TREE;
772 /* Catch squaring. */
773 if ((host_integerp (exp, 0)
774 && tree_low_cst (exp, 0) == 2)
775 || (TREE_CODE (exp) == REAL_CST
776 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
778 *type_in = TREE_TYPE (base);
780 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
781 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
782 return stmt;
785 /* Catch square root. */
786 if (TREE_CODE (exp) == REAL_CST
787 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
789 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
790 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
791 if (*type_in)
793 gimple stmt = gimple_build_call (newfn, 1, base);
794 if (vectorizable_function (stmt, *type_in, *type_in)
795 != NULL_TREE)
797 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
798 gimple_call_set_lhs (stmt, var);
799 return stmt;
804 return NULL;
808 /* Function vect_recog_widen_sum_pattern
810 Try to find the following pattern:
812 type x_t;
813 TYPE x_T, sum = init;
814 loop:
815 sum_0 = phi <init, sum_1>
816 S1 x_t = *p;
817 S2 x_T = (TYPE) x_t;
818 S3 sum_1 = x_T + sum_0;
820 where type 'TYPE' is at least double the size of type 'type', i.e - we're
821 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
822 a special case of a reduction computation.
824 Input:
826 * LAST_STMT: A stmt from which the pattern search begins. In the example,
827 when this function is called with S3, the pattern {S2,S3} will be detected.
829 Output:
831 * TYPE_IN: The type of the input arguments to the pattern.
833 * TYPE_OUT: The type of the output of this pattern.
835 * Return value: A new stmt that will be used to replace the sequence of
836 stmts that constitute the pattern. In this case it will be:
837 WIDEN_SUM <x_t, sum_0>
839 Note: The widening-sum idiom is a widening reduction pattern that is
840 vectorized without preserving all the intermediate results. It
841 produces only N/2 (widened) results (by summing up pairs of
842 intermediate results) rather than all N results. Therefore, we
843 cannot allow this pattern when we want to get all the results and in
844 the correct order (as is the case when this computation is in an
845 inner-loop nested in an outer-loop that us being vectorized). */
847 static gimple
848 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
849 tree *type_out)
851 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
852 tree oprnd0, oprnd1;
853 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
854 tree type, half_type;
855 gimple pattern_stmt;
856 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
857 struct loop *loop;
858 tree var;
859 bool promotion;
861 if (!loop_info)
862 return NULL;
864 loop = LOOP_VINFO_LOOP (loop_info);
866 if (!is_gimple_assign (last_stmt))
867 return NULL;
869 type = gimple_expr_type (last_stmt);
871 /* Look for the following pattern
872 DX = (TYPE) X;
873 sum_1 = DX + sum_0;
874 In which DX is at least double the size of X, and sum_1 has been
875 recognized as a reduction variable.
878 /* Starting from LAST_STMT, follow the defs of its uses in search
879 of the above pattern. */
881 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
882 return NULL;
884 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
885 return NULL;
887 oprnd0 = gimple_assign_rhs1 (last_stmt);
888 oprnd1 = gimple_assign_rhs2 (last_stmt);
889 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
890 || !types_compatible_p (TREE_TYPE (oprnd1), type))
891 return NULL;
893 /* So far so good. Since last_stmt was detected as a (summation) reduction,
894 we know that oprnd1 is the reduction variable (defined by a loop-header
895 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
896 Left to check that oprnd0 is defined by a cast from type 'type' to type
897 'TYPE'. */
899 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
900 &promotion)
901 || !promotion)
902 return NULL;
904 oprnd0 = gimple_assign_rhs1 (stmt);
905 *type_in = half_type;
906 *type_out = type;
908 /* Pattern detected. Create a stmt to be used to replace the pattern: */
909 var = vect_recog_temp_ssa_var (type, NULL);
910 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
911 oprnd0, oprnd1);
913 if (vect_print_dump_info (REPORT_DETAILS))
915 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
916 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
919 /* We don't allow changing the order of the computation in the inner-loop
920 when doing outer-loop vectorization. */
921 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
923 return pattern_stmt;
927 /* Return TRUE if the operation in STMT can be performed on a smaller type.
929 Input:
930 STMT - a statement to check.
931 DEF - we support operations with two operands, one of which is constant.
932 The other operand can be defined by a demotion operation, or by a
933 previous statement in a sequence of over-promoted operations. In the
934 later case DEF is used to replace that operand. (It is defined by a
935 pattern statement we created for the previous statement in the
936 sequence).
938 Input/output:
939 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
940 NULL, it's the type of DEF.
941 STMTS - additional pattern statements. If a pattern statement (type
942 conversion) is created in this function, its original statement is
943 added to STMTS.
945 Output:
946 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
947 operands to use in the new pattern statement for STMT (will be created
948 in vect_recog_over_widening_pattern ()).
949 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
950 statements for STMT: the first one is a type promotion and the second
951 one is the operation itself. We return the type promotion statement
952 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
953 the second pattern statement. */
955 static bool
956 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
957 tree *op0, tree *op1, gimple *new_def_stmt,
958 VEC (gimple, heap) **stmts)
960 enum tree_code code;
961 tree const_oprnd, oprnd;
962 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
963 gimple def_stmt, new_stmt;
964 bool first = false;
965 bool promotion;
967 *op0 = NULL_TREE;
968 *op1 = NULL_TREE;
969 *new_def_stmt = NULL;
971 if (!is_gimple_assign (stmt))
972 return false;
974 code = gimple_assign_rhs_code (stmt);
975 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
976 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
977 return false;
979 oprnd = gimple_assign_rhs1 (stmt);
980 const_oprnd = gimple_assign_rhs2 (stmt);
981 type = gimple_expr_type (stmt);
983 if (TREE_CODE (oprnd) != SSA_NAME
984 || TREE_CODE (const_oprnd) != INTEGER_CST)
985 return false;
987 /* If oprnd has other uses besides that in stmt we cannot mark it
988 as being part of a pattern only. */
989 if (!has_single_use (oprnd))
990 return false;
992 /* If we are in the middle of a sequence, we use DEF from a previous
993 statement. Otherwise, OPRND has to be a result of type promotion. */
994 if (*new_type)
996 half_type = *new_type;
997 oprnd = def;
999 else
1001 first = true;
1002 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1003 &promotion)
1004 || !promotion
1005 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1006 return false;
1009 /* Can we perform the operation on a smaller type? */
1010 switch (code)
1012 case BIT_IOR_EXPR:
1013 case BIT_XOR_EXPR:
1014 case BIT_AND_EXPR:
1015 if (!int_fits_type_p (const_oprnd, half_type))
1017 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1018 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1019 return false;
1021 interm_type = build_nonstandard_integer_type (
1022 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1023 if (!int_fits_type_p (const_oprnd, interm_type))
1024 return false;
1027 break;
1029 case LSHIFT_EXPR:
1030 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1031 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1032 return false;
1034 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1035 (e.g., if the original value was char, the shift amount is at most 8
1036 if we want to use short). */
1037 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1038 return false;
1040 interm_type = build_nonstandard_integer_type (
1041 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1043 if (!vect_supportable_shift (code, interm_type))
1044 return false;
1046 break;
1048 case RSHIFT_EXPR:
1049 if (vect_supportable_shift (code, half_type))
1050 break;
1052 /* Try intermediate type - HALF_TYPE is not supported. */
1053 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1054 return false;
1056 interm_type = build_nonstandard_integer_type (
1057 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1059 if (!vect_supportable_shift (code, interm_type))
1060 return false;
1062 break;
1064 default:
1065 gcc_unreachable ();
1068 /* There are four possible cases:
1069 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1070 the first statement in the sequence)
1071 a. The original, HALF_TYPE, is not enough - we replace the promotion
1072 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1073 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1074 promotion.
1075 2. OPRND is defined by a pattern statement we created.
1076 a. Its type is not sufficient for the operation, we create a new stmt:
1077 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1078 this statement in NEW_DEF_STMT, and it is later put in
1079 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1080 b. OPRND is good to use in the new statement. */
1081 if (first)
1083 if (interm_type)
1085 /* Replace the original type conversion HALF_TYPE->TYPE with
1086 HALF_TYPE->INTERM_TYPE. */
1087 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1089 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1090 /* Check if the already created pattern stmt is what we need. */
1091 if (!is_gimple_assign (new_stmt)
1092 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1093 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1094 return false;
1096 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1097 oprnd = gimple_assign_lhs (new_stmt);
1099 else
1101 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1102 oprnd = gimple_assign_rhs1 (def_stmt);
1103 new_oprnd = make_ssa_name (interm_type, NULL);
1104 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1105 oprnd, NULL_TREE);
1106 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1107 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1108 oprnd = new_oprnd;
1111 else
1113 /* Retrieve the operand before the type promotion. */
1114 oprnd = gimple_assign_rhs1 (def_stmt);
1117 else
1119 if (interm_type)
1121 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1122 new_oprnd = make_ssa_name (interm_type, NULL);
1123 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1124 oprnd, NULL_TREE);
1125 oprnd = new_oprnd;
1126 *new_def_stmt = new_stmt;
1129 /* Otherwise, OPRND is already set. */
1132 if (interm_type)
1133 *new_type = interm_type;
1134 else
1135 *new_type = half_type;
1137 *op0 = oprnd;
1138 *op1 = fold_convert (*new_type, const_oprnd);
1140 return true;
1144 /* Try to find a statement or a sequence of statements that can be performed
1145 on a smaller type:
1147 type x_t;
1148 TYPE x_T, res0_T, res1_T;
1149 loop:
1150 S1 x_t = *p;
1151 S2 x_T = (TYPE) x_t;
1152 S3 res0_T = op (x_T, C0);
1153 S4 res1_T = op (res0_T, C1);
1154 S5 ... = () res1_T; - type demotion
1156 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1157 constants.
1158 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1159 be 'type' or some intermediate type. For now, we expect S5 to be a type
1160 demotion operation. We also check that S3 and S4 have only one use. */
1162 static gimple
1163 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1164 tree *type_in, tree *type_out)
1166 gimple stmt = VEC_pop (gimple, *stmts);
1167 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1168 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1169 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1170 bool first;
1171 tree type = NULL;
1173 first = true;
1174 while (1)
1176 if (!vinfo_for_stmt (stmt)
1177 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1178 return NULL;
1180 new_def_stmt = NULL;
1181 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1182 &op0, &op1, &new_def_stmt,
1183 stmts))
1185 if (first)
1186 return NULL;
1187 else
1188 break;
1191 /* STMT can be performed on a smaller type. Check its uses. */
1192 use_stmt = vect_single_imm_use (stmt);
1193 if (!use_stmt || !is_gimple_assign (use_stmt))
1194 return NULL;
1196 /* Create pattern statement for STMT. */
1197 vectype = get_vectype_for_scalar_type (new_type);
1198 if (!vectype)
1199 return NULL;
1201 /* We want to collect all the statements for which we create pattern
1202 statetments, except for the case when the last statement in the
1203 sequence doesn't have a corresponding pattern statement. In such
1204 case we associate the last pattern statement with the last statement
1205 in the sequence. Therefore, we only add the original statement to
1206 the list if we know that it is not the last. */
1207 if (prev_stmt)
1208 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1210 var = vect_recog_temp_ssa_var (new_type, NULL);
1211 pattern_stmt
1212 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1213 op0, op1);
1214 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1215 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1217 if (vect_print_dump_info (REPORT_DETAILS))
1219 fprintf (vect_dump, "created pattern stmt: ");
1220 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1223 type = gimple_expr_type (stmt);
1224 prev_stmt = stmt;
1225 stmt = use_stmt;
1227 first = false;
1230 /* We got a sequence. We expect it to end with a type demotion operation.
1231 Otherwise, we quit (for now). There are three possible cases: the
1232 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1233 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1234 NEW_TYPE differs (we create a new conversion statement). */
1235 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1237 use_lhs = gimple_assign_lhs (use_stmt);
1238 use_type = TREE_TYPE (use_lhs);
1239 /* Support only type demotion or signedess change. */
1240 if (!INTEGRAL_TYPE_P (use_type)
1241 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1242 return NULL;
1244 /* Check that NEW_TYPE is not bigger than the conversion result. */
1245 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1246 return NULL;
1248 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1249 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1251 /* Create NEW_TYPE->USE_TYPE conversion. */
1252 new_oprnd = make_ssa_name (use_type, NULL);
1253 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1254 var, NULL_TREE);
1255 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1257 *type_in = get_vectype_for_scalar_type (new_type);
1258 *type_out = get_vectype_for_scalar_type (use_type);
1260 /* We created a pattern statement for the last statement in the
1261 sequence, so we don't need to associate it with the pattern
1262 statement created for PREV_STMT. Therefore, we add PREV_STMT
1263 to the list in order to mark it later in vect_pattern_recog_1. */
1264 if (prev_stmt)
1265 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1267 else
1269 if (prev_stmt)
1270 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1271 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1273 *type_in = vectype;
1274 *type_out = NULL_TREE;
1277 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1279 else
1280 /* TODO: support general case, create a conversion to the correct type. */
1281 return NULL;
1283 /* Pattern detected. */
1284 if (vect_print_dump_info (REPORT_DETAILS))
1286 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1287 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1290 return pattern_stmt;
1293 /* Detect widening shift pattern:
1295 type a_t;
1296 TYPE a_T, res_T;
1298 S1 a_t = ;
1299 S2 a_T = (TYPE) a_t;
1300 S3 res_T = a_T << CONST;
1302 where type 'TYPE' is at least double the size of type 'type'.
1304 Also detect cases where the shift result is immediately converted
1305 to another type 'result_type' that is no larger in size than 'TYPE'.
1306 In those cases we perform a widen-shift that directly results in
1307 'result_type', to avoid a possible over-widening situation:
1309 type a_t;
1310 TYPE a_T, res_T;
1311 result_type res_result;
1313 S1 a_t = ;
1314 S2 a_T = (TYPE) a_t;
1315 S3 res_T = a_T << CONST;
1316 S4 res_result = (result_type) res_T;
1317 '--> res_result' = a_t w<< CONST;
1319 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1320 create an additional pattern stmt for S2 to create a variable of an
1321 intermediate type, and perform widen-shift on the intermediate type:
1323 type a_t;
1324 interm_type a_it;
1325 TYPE a_T, res_T, res_T';
1327 S1 a_t = ;
1328 S2 a_T = (TYPE) a_t;
1329 '--> a_it = (interm_type) a_t;
1330 S3 res_T = a_T << CONST;
1331 '--> res_T' = a_it <<* CONST;
1333 Input/Output:
1335 * STMTS: Contains a stmt from which the pattern search begins.
1336 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1337 in STMTS. When an intermediate type is used and a pattern statement is
1338 created for S2, we also put S2 here (before S3).
1340 Output:
1342 * TYPE_IN: The type of the input arguments to the pattern.
1344 * TYPE_OUT: The type of the output of this pattern.
1346 * Return value: A new stmt that will be used to replace the sequence of
1347 stmts that constitute the pattern. In this case it will be:
1348 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1350 static gimple
1351 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1352 tree *type_in, tree *type_out)
1354 gimple last_stmt = VEC_pop (gimple, *stmts);
1355 gimple def_stmt0;
1356 tree oprnd0, oprnd1;
1357 tree type, half_type0;
1358 gimple pattern_stmt;
1359 tree vectype, vectype_out = NULL_TREE;
1360 tree var;
1361 enum tree_code dummy_code;
1362 int dummy_int;
1363 VEC (tree, heap) * dummy_vec;
1364 gimple use_stmt;
1365 bool promotion;
1367 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1368 return NULL;
1370 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1371 return NULL;
1373 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1374 return NULL;
1376 oprnd0 = gimple_assign_rhs1 (last_stmt);
1377 oprnd1 = gimple_assign_rhs2 (last_stmt);
1378 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1379 return NULL;
1381 /* Check operand 0: it has to be defined by a type promotion. */
1382 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1383 &promotion)
1384 || !promotion)
1385 return NULL;
1387 /* Check operand 1: has to be positive. We check that it fits the type
1388 in vect_handle_widen_op_by_const (). */
1389 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1390 return NULL;
1392 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1393 type = gimple_expr_type (last_stmt);
1395 /* Check for subsequent conversion to another type. */
1396 use_stmt = vect_single_imm_use (last_stmt);
1397 if (use_stmt && is_gimple_assign (use_stmt)
1398 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1399 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1401 tree use_lhs = gimple_assign_lhs (use_stmt);
1402 tree use_type = TREE_TYPE (use_lhs);
1404 if (INTEGRAL_TYPE_P (use_type)
1405 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1407 last_stmt = use_stmt;
1408 type = use_type;
1412 /* Check if this a widening operation. */
1413 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1414 &oprnd0, stmts,
1415 type, &half_type0, def_stmt0))
1416 return NULL;
1418 /* Pattern detected. */
1419 if (vect_print_dump_info (REPORT_DETAILS))
1420 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1422 /* Check target support. */
1423 vectype = get_vectype_for_scalar_type (half_type0);
1424 vectype_out = get_vectype_for_scalar_type (type);
1426 if (!vectype
1427 || !vectype_out
1428 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1429 vectype_out, vectype,
1430 &dummy_code, &dummy_code,
1431 &dummy_int, &dummy_vec))
1432 return NULL;
1434 *type_in = vectype;
1435 *type_out = vectype_out;
1437 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1438 var = vect_recog_temp_ssa_var (type, NULL);
1439 pattern_stmt =
1440 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1442 if (vect_print_dump_info (REPORT_DETAILS))
1443 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1445 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1446 return pattern_stmt;
1449 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1450 vectorized:
1452 type a_t;
1453 TYPE b_T, res_T;
1455 S1 a_t = ;
1456 S2 b_T = ;
1457 S3 res_T = b_T op a_t;
1459 where type 'TYPE' is a type with different size than 'type',
1460 and op is <<, >> or rotate.
1462 Also detect cases:
1464 type a_t;
1465 TYPE b_T, c_T, res_T;
1467 S0 c_T = ;
1468 S1 a_t = (type) c_T;
1469 S2 b_T = ;
1470 S3 res_T = b_T op a_t;
1472 Input/Output:
1474 * STMTS: Contains a stmt from which the pattern search begins,
1475 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1476 with a shift/rotate which has same type on both operands, in the
1477 second case just b_T op c_T, in the first case with added cast
1478 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1480 Output:
1482 * TYPE_IN: The type of the input arguments to the pattern.
1484 * TYPE_OUT: The type of the output of this pattern.
1486 * Return value: A new stmt that will be used to replace the shift/rotate
1487 S3 stmt. */
1489 static gimple
1490 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1491 tree *type_in, tree *type_out)
1493 gimple last_stmt = VEC_pop (gimple, *stmts);
1494 tree oprnd0, oprnd1, lhs, var;
1495 gimple pattern_stmt, def_stmt;
1496 enum tree_code rhs_code;
1497 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1498 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1499 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1500 enum vect_def_type dt;
1501 tree def;
1503 if (!is_gimple_assign (last_stmt))
1504 return NULL;
1506 rhs_code = gimple_assign_rhs_code (last_stmt);
1507 switch (rhs_code)
1509 case LSHIFT_EXPR:
1510 case RSHIFT_EXPR:
1511 case LROTATE_EXPR:
1512 case RROTATE_EXPR:
1513 break;
1514 default:
1515 return NULL;
1518 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1519 return NULL;
1521 lhs = gimple_assign_lhs (last_stmt);
1522 oprnd0 = gimple_assign_rhs1 (last_stmt);
1523 oprnd1 = gimple_assign_rhs2 (last_stmt);
1524 if (TREE_CODE (oprnd0) != SSA_NAME
1525 || TREE_CODE (oprnd1) != SSA_NAME
1526 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1527 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1528 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1529 || TYPE_PRECISION (TREE_TYPE (lhs))
1530 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1531 return NULL;
1533 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1534 &def, &dt))
1535 return NULL;
1537 if (dt != vect_internal_def)
1538 return NULL;
1540 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1541 *type_out = *type_in;
1542 if (*type_in == NULL_TREE)
1543 return NULL;
1545 def = NULL_TREE;
1546 if (gimple_assign_cast_p (def_stmt))
1548 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1549 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1550 && TYPE_PRECISION (TREE_TYPE (rhs1))
1551 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1552 def = rhs1;
1555 if (def == NULL_TREE)
1557 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1558 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1559 NULL_TREE);
1560 new_pattern_def_seq (stmt_vinfo, def_stmt);
1563 /* Pattern detected. */
1564 if (vect_print_dump_info (REPORT_DETAILS))
1565 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1567 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1568 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1569 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1571 if (vect_print_dump_info (REPORT_DETAILS))
1572 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1574 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1575 return pattern_stmt;
1578 /* Detect a signed division by a constant that wouldn't be
1579 otherwise vectorized:
1581 type a_t, b_t;
1583 S1 a_t = b_t / N;
1585 where type 'type' is an integral type and N is a constant.
1587 Similarly handle modulo by a constant:
1589 S4 a_t = b_t % N;
1591 Input/Output:
1593 * STMTS: Contains a stmt from which the pattern search begins,
1594 i.e. the division stmt. S1 is replaced by if N is a power
1595 of two constant and type is signed:
1596 S3 y_t = b_t < 0 ? N - 1 : 0;
1597 S2 x_t = b_t + y_t;
1598 S1' a_t = x_t >> log2 (N);
1600 S4 is replaced if N is a power of two constant and
1601 type is signed by (where *_T temporaries have unsigned type):
1602 S9 y_T = b_t < 0 ? -1U : 0U;
1603 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1604 S7 z_t = (type) z_T;
1605 S6 w_t = b_t + z_t;
1606 S5 x_t = w_t & (N - 1);
1607 S4' a_t = x_t - z_t;
1609 Output:
1611 * TYPE_IN: The type of the input arguments to the pattern.
1613 * TYPE_OUT: The type of the output of this pattern.
1615 * Return value: A new stmt that will be used to replace the division
1616 S1 or modulo S4 stmt. */
1618 static gimple
1619 vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
1620 tree *type_in, tree *type_out)
1622 gimple last_stmt = VEC_pop (gimple, *stmts);
1623 tree oprnd0, oprnd1, vectype, itype, cond;
1624 gimple pattern_stmt, def_stmt;
1625 enum tree_code rhs_code;
1626 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1627 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1628 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1629 optab optab;
1630 tree q;
1631 int dummy_int, prec;
1632 stmt_vec_info def_stmt_vinfo;
1634 if (!is_gimple_assign (last_stmt))
1635 return NULL;
1637 rhs_code = gimple_assign_rhs_code (last_stmt);
1638 switch (rhs_code)
1640 case TRUNC_DIV_EXPR:
1641 case TRUNC_MOD_EXPR:
1642 break;
1643 default:
1644 return NULL;
1647 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1648 return NULL;
1650 oprnd0 = gimple_assign_rhs1 (last_stmt);
1651 oprnd1 = gimple_assign_rhs2 (last_stmt);
1652 itype = TREE_TYPE (oprnd0);
1653 if (TREE_CODE (oprnd0) != SSA_NAME
1654 || TREE_CODE (oprnd1) != INTEGER_CST
1655 || TREE_CODE (itype) != INTEGER_TYPE
1656 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1657 return NULL;
1659 vectype = get_vectype_for_scalar_type (itype);
1660 if (vectype == NULL_TREE)
1661 return NULL;
1663 /* If the target can handle vectorized division or modulo natively,
1664 don't attempt to optimize this. */
1665 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1666 if (optab != unknown_optab)
1668 enum machine_mode vec_mode = TYPE_MODE (vectype);
1669 int icode = (int) optab_handler (optab, vec_mode);
1670 if (icode != CODE_FOR_nothing)
1671 return NULL;
1674 prec = TYPE_PRECISION (itype);
1675 if (integer_pow2p (oprnd1))
1677 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1678 return NULL;
1680 /* Pattern detected. */
1681 if (vect_print_dump_info (REPORT_DETAILS))
1682 fprintf (vect_dump, "vect_recog_divmod_pattern: detected: ");
1684 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1685 build_int_cst (itype, 0));
1686 if (rhs_code == TRUNC_DIV_EXPR)
1688 tree var = vect_recog_temp_ssa_var (itype, NULL);
1689 tree shift;
1690 def_stmt
1691 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1692 fold_build2 (MINUS_EXPR, itype,
1693 oprnd1,
1694 build_int_cst (itype,
1695 1)),
1696 build_int_cst (itype, 0));
1697 new_pattern_def_seq (stmt_vinfo, def_stmt);
1698 var = vect_recog_temp_ssa_var (itype, NULL);
1699 def_stmt
1700 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1701 gimple_assign_lhs (def_stmt));
1702 append_pattern_def_seq (stmt_vinfo, def_stmt);
1704 shift = build_int_cst (itype, tree_log2 (oprnd1));
1705 pattern_stmt
1706 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1707 vect_recog_temp_ssa_var (itype,
1708 NULL),
1709 var, shift);
1711 else
1713 tree signmask;
1714 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1715 if (compare_tree_int (oprnd1, 2) == 0)
1717 signmask = vect_recog_temp_ssa_var (itype, NULL);
1718 def_stmt
1719 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1720 build_int_cst (itype, 1),
1721 build_int_cst (itype, 0));
1722 append_pattern_def_seq (stmt_vinfo, def_stmt);
1724 else
1726 tree utype
1727 = build_nonstandard_integer_type (prec, 1);
1728 tree vecutype = get_vectype_for_scalar_type (utype);
1729 tree shift
1730 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1731 - tree_log2 (oprnd1));
1732 tree var = vect_recog_temp_ssa_var (utype, NULL);
1734 def_stmt
1735 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1736 build_int_cst (utype, -1),
1737 build_int_cst (utype, 0));
1738 def_stmt_vinfo
1739 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1740 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1741 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1742 append_pattern_def_seq (stmt_vinfo, def_stmt);
1743 var = vect_recog_temp_ssa_var (utype, NULL);
1744 def_stmt
1745 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1746 gimple_assign_lhs (def_stmt),
1747 shift);
1748 def_stmt_vinfo
1749 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1750 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1751 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1752 append_pattern_def_seq (stmt_vinfo, def_stmt);
1753 signmask = vect_recog_temp_ssa_var (itype, NULL);
1754 def_stmt
1755 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1756 NULL_TREE);
1757 append_pattern_def_seq (stmt_vinfo, def_stmt);
1759 def_stmt
1760 = gimple_build_assign_with_ops (PLUS_EXPR,
1761 vect_recog_temp_ssa_var (itype,
1762 NULL),
1763 oprnd0, signmask);
1764 append_pattern_def_seq (stmt_vinfo, def_stmt);
1765 def_stmt
1766 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1767 vect_recog_temp_ssa_var (itype,
1768 NULL),
1769 gimple_assign_lhs (def_stmt),
1770 fold_build2 (MINUS_EXPR, itype,
1771 oprnd1,
1772 build_int_cst (itype,
1773 1)));
1774 append_pattern_def_seq (stmt_vinfo, def_stmt);
1776 pattern_stmt
1777 = gimple_build_assign_with_ops (MINUS_EXPR,
1778 vect_recog_temp_ssa_var (itype,
1779 NULL),
1780 gimple_assign_lhs (def_stmt),
1781 signmask);
1784 if (vect_print_dump_info (REPORT_DETAILS))
1785 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1787 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1789 *type_in = vectype;
1790 *type_out = vectype;
1791 return pattern_stmt;
1794 if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
1795 || integer_zerop (oprnd1)
1796 || prec > HOST_BITS_PER_WIDE_INT)
1797 return NULL;
1799 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
1800 return NULL;
1802 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1804 if (TYPE_UNSIGNED (itype))
1806 unsigned HOST_WIDE_INT mh, ml;
1807 int pre_shift, post_shift;
1808 unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
1809 & GET_MODE_MASK (TYPE_MODE (itype));
1810 tree t1, t2, t3, t4;
1812 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
1813 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
1814 return NULL;
1816 /* Find a suitable multiplier and right shift count
1817 instead of multiplying with D. */
1818 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
1820 /* If the suggested multiplier is more than SIZE bits, we can do better
1821 for even divisors, using an initial right shift. */
1822 if (mh != 0 && (d & 1) == 0)
1824 pre_shift = floor_log2 (d & -d);
1825 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
1826 &ml, &post_shift, &dummy_int);
1827 gcc_assert (!mh);
1829 else
1830 pre_shift = 0;
1832 if (mh != 0)
1834 if (post_shift - 1 >= prec)
1835 return NULL;
1837 /* t1 = oprnd0 h* ml;
1838 t2 = oprnd0 - t1;
1839 t3 = t2 >> 1;
1840 t4 = t1 + t3;
1841 q = t4 >> (post_shift - 1); */
1842 t1 = vect_recog_temp_ssa_var (itype, NULL);
1843 def_stmt
1844 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1845 build_int_cst (itype, ml));
1846 append_pattern_def_seq (stmt_vinfo, def_stmt);
1848 t2 = vect_recog_temp_ssa_var (itype, NULL);
1849 def_stmt
1850 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
1851 append_pattern_def_seq (stmt_vinfo, def_stmt);
1853 t3 = vect_recog_temp_ssa_var (itype, NULL);
1854 def_stmt
1855 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1856 integer_one_node);
1857 append_pattern_def_seq (stmt_vinfo, def_stmt);
1859 t4 = vect_recog_temp_ssa_var (itype, NULL);
1860 def_stmt
1861 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
1863 if (post_shift != 1)
1865 append_pattern_def_seq (stmt_vinfo, def_stmt);
1867 q = vect_recog_temp_ssa_var (itype, NULL);
1868 pattern_stmt
1869 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
1870 build_int_cst (itype,
1871 post_shift
1872 - 1));
1874 else
1876 q = t4;
1877 pattern_stmt = def_stmt;
1880 else
1882 if (pre_shift >= prec || post_shift >= prec)
1883 return NULL;
1885 /* t1 = oprnd0 >> pre_shift;
1886 t2 = t1 h* ml;
1887 q = t2 >> post_shift; */
1888 if (pre_shift)
1890 t1 = vect_recog_temp_ssa_var (itype, NULL);
1891 def_stmt
1892 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
1893 build_int_cst (NULL,
1894 pre_shift));
1895 append_pattern_def_seq (stmt_vinfo, def_stmt);
1897 else
1898 t1 = oprnd0;
1900 t2 = vect_recog_temp_ssa_var (itype, NULL);
1901 def_stmt
1902 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
1903 build_int_cst (itype, ml));
1905 if (post_shift)
1907 append_pattern_def_seq (stmt_vinfo, def_stmt);
1909 q = vect_recog_temp_ssa_var (itype, NULL);
1910 def_stmt
1911 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
1912 build_int_cst (itype,
1913 post_shift));
1915 else
1916 q = t2;
1918 pattern_stmt = def_stmt;
1921 else
1923 unsigned HOST_WIDE_INT ml;
1924 int post_shift;
1925 HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
1926 unsigned HOST_WIDE_INT abs_d;
1927 bool add = false;
1928 tree t1, t2, t3, t4;
1930 /* Give up for -1. */
1931 if (d == -1)
1932 return NULL;
1934 /* Since d might be INT_MIN, we have to cast to
1935 unsigned HOST_WIDE_INT before negating to avoid
1936 undefined signed overflow. */
1937 abs_d = (d >= 0
1938 ? (unsigned HOST_WIDE_INT) d
1939 : - (unsigned HOST_WIDE_INT) d);
1941 /* n rem d = n rem -d */
1942 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
1944 d = abs_d;
1945 oprnd1 = build_int_cst (itype, abs_d);
1947 else if (HOST_BITS_PER_WIDE_INT >= prec
1948 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1949 /* This case is not handled correctly below. */
1950 return NULL;
1952 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
1953 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1955 add = true;
1956 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
1958 if (post_shift >= prec)
1959 return NULL;
1961 /* t1 = oprnd1 h* ml; */
1962 t1 = vect_recog_temp_ssa_var (itype, NULL);
1963 def_stmt
1964 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1965 build_int_cst (itype, ml));
1966 append_pattern_def_seq (stmt_vinfo, def_stmt);
1968 if (add)
1970 /* t2 = t1 + oprnd0; */
1971 t2 = vect_recog_temp_ssa_var (itype, NULL);
1972 def_stmt
1973 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
1974 append_pattern_def_seq (stmt_vinfo, def_stmt);
1976 else
1977 t2 = t1;
1979 if (post_shift)
1981 /* t3 = t2 >> post_shift; */
1982 t3 = vect_recog_temp_ssa_var (itype, NULL);
1983 def_stmt
1984 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1985 build_int_cst (itype, post_shift));
1986 append_pattern_def_seq (stmt_vinfo, def_stmt);
1988 else
1989 t3 = t2;
1991 /* t4 = oprnd0 >> (prec - 1); */
1992 t4 = vect_recog_temp_ssa_var (itype, NULL);
1993 def_stmt
1994 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
1995 build_int_cst (itype, prec - 1));
1996 append_pattern_def_seq (stmt_vinfo, def_stmt);
1998 /* q = t3 - t4; or q = t4 - t3; */
1999 q = vect_recog_temp_ssa_var (itype, NULL);
2000 pattern_stmt
2001 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2002 d < 0 ? t3 : t4);
2005 if (rhs_code == TRUNC_MOD_EXPR)
2007 tree r, t1;
2009 /* We divided. Now finish by:
2010 t1 = q * oprnd1;
2011 r = oprnd0 - t1; */
2012 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2014 t1 = vect_recog_temp_ssa_var (itype, NULL);
2015 def_stmt
2016 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2017 append_pattern_def_seq (stmt_vinfo, def_stmt);
2019 r = vect_recog_temp_ssa_var (itype, NULL);
2020 pattern_stmt
2021 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2024 /* Pattern detected. */
2025 if (vect_print_dump_info (REPORT_DETAILS))
2026 fprintf (vect_dump, "vect_recog_divmod_pattern: detected: ");
2028 if (vect_print_dump_info (REPORT_DETAILS))
2029 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2031 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2033 *type_in = vectype;
2034 *type_out = vectype;
2035 return pattern_stmt;
2038 /* Function vect_recog_mixed_size_cond_pattern
2040 Try to find the following pattern:
2042 type x_t, y_t;
2043 TYPE a_T, b_T, c_T;
2044 loop:
2045 S1 a_T = x_t CMP y_t ? b_T : c_T;
2047 where type 'TYPE' is an integral type which has different size
2048 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2049 than 'type', the constants need to fit into an integer type
2050 with the same width as 'type') or results of conversion from 'type'.
2052 Input:
2054 * LAST_STMT: A stmt from which the pattern search begins.
2056 Output:
2058 * TYPE_IN: The type of the input arguments to the pattern.
2060 * TYPE_OUT: The type of the output of this pattern.
2062 * Return value: A new stmt that will be used to replace the pattern.
2063 Additionally a def_stmt is added.
2065 a_it = x_t CMP y_t ? b_it : c_it;
2066 a_T = (TYPE) a_it; */
2068 static gimple
2069 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2070 tree *type_out)
2072 gimple last_stmt = VEC_index (gimple, *stmts, 0);
2073 tree cond_expr, then_clause, else_clause;
2074 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2075 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2076 enum machine_mode cmpmode;
2077 gimple pattern_stmt, def_stmt;
2078 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2079 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2080 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2081 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2082 bool promotion;
2083 tree comp_scalar_type;
2085 if (!is_gimple_assign (last_stmt)
2086 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2087 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2088 return NULL;
2090 cond_expr = gimple_assign_rhs1 (last_stmt);
2091 then_clause = gimple_assign_rhs2 (last_stmt);
2092 else_clause = gimple_assign_rhs3 (last_stmt);
2094 if (!COMPARISON_CLASS_P (cond_expr))
2095 return NULL;
2097 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2098 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2099 if (comp_vectype == NULL_TREE)
2100 return NULL;
2102 type = gimple_expr_type (last_stmt);
2103 if (types_compatible_p (type, comp_scalar_type)
2104 || ((TREE_CODE (then_clause) != INTEGER_CST
2105 || TREE_CODE (else_clause) != INTEGER_CST)
2106 && !INTEGRAL_TYPE_P (comp_scalar_type))
2107 || !INTEGRAL_TYPE_P (type))
2108 return NULL;
2110 if ((TREE_CODE (then_clause) != INTEGER_CST
2111 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2112 &def_stmt0, &promotion))
2113 || (TREE_CODE (else_clause) != INTEGER_CST
2114 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2115 &def_stmt1, &promotion)))
2116 return NULL;
2118 if (orig_type0 && orig_type1
2119 && !types_compatible_p (orig_type0, orig_type1))
2120 return NULL;
2122 if (orig_type0)
2124 if (!types_compatible_p (orig_type0, comp_scalar_type))
2125 return NULL;
2126 then_clause = gimple_assign_rhs1 (def_stmt0);
2127 itype = orig_type0;
2130 if (orig_type1)
2132 if (!types_compatible_p (orig_type1, comp_scalar_type))
2133 return NULL;
2134 else_clause = gimple_assign_rhs1 (def_stmt1);
2135 itype = orig_type1;
2138 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2140 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2141 return NULL;
2143 vectype = get_vectype_for_scalar_type (type);
2144 if (vectype == NULL_TREE)
2145 return NULL;
2147 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2148 return NULL;
2150 if (itype == NULL_TREE)
2151 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2152 TYPE_UNSIGNED (type));
2154 if (itype == NULL_TREE
2155 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2156 return NULL;
2158 vecitype = get_vectype_for_scalar_type (itype);
2159 if (vecitype == NULL_TREE)
2160 return NULL;
2162 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2163 return NULL;
2165 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2167 if ((TREE_CODE (then_clause) == INTEGER_CST
2168 && !int_fits_type_p (then_clause, itype))
2169 || (TREE_CODE (else_clause) == INTEGER_CST
2170 && !int_fits_type_p (else_clause, itype)))
2171 return NULL;
2174 def_stmt
2175 = gimple_build_assign_with_ops3 (COND_EXPR,
2176 vect_recog_temp_ssa_var (itype, NULL),
2177 unshare_expr (cond_expr),
2178 fold_convert (itype, then_clause),
2179 fold_convert (itype, else_clause));
2180 pattern_stmt
2181 = gimple_build_assign_with_ops (NOP_EXPR,
2182 vect_recog_temp_ssa_var (type, NULL),
2183 gimple_assign_lhs (def_stmt), NULL_TREE);
2185 new_pattern_def_seq (stmt_vinfo, def_stmt);
2186 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2187 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2188 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2189 *type_in = vecitype;
2190 *type_out = vectype;
2192 if (vect_print_dump_info (REPORT_DETAILS))
2193 fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: ");
2195 return pattern_stmt;
2199 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2200 true if bool VAR can be optimized that way. */
2202 static bool
2203 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2205 gimple def_stmt;
2206 enum vect_def_type dt;
2207 tree def, rhs1;
2208 enum tree_code rhs_code;
2210 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2211 &dt))
2212 return false;
2214 if (dt != vect_internal_def)
2215 return false;
2217 if (!is_gimple_assign (def_stmt))
2218 return false;
2220 if (!has_single_use (def))
2221 return false;
2223 rhs1 = gimple_assign_rhs1 (def_stmt);
2224 rhs_code = gimple_assign_rhs_code (def_stmt);
2225 switch (rhs_code)
2227 case SSA_NAME:
2228 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2230 CASE_CONVERT:
2231 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2232 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2233 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2234 return false;
2235 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2237 case BIT_NOT_EXPR:
2238 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2240 case BIT_AND_EXPR:
2241 case BIT_IOR_EXPR:
2242 case BIT_XOR_EXPR:
2243 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2244 return false;
2245 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2246 bb_vinfo);
2248 default:
2249 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2251 tree vecitype, comp_vectype;
2253 /* If the comparison can throw, then is_gimple_condexpr will be
2254 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2255 if (stmt_could_throw_p (def_stmt))
2256 return false;
2258 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2259 if (comp_vectype == NULL_TREE)
2260 return false;
2262 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2264 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2265 tree itype
2266 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2267 vecitype = get_vectype_for_scalar_type (itype);
2268 if (vecitype == NULL_TREE)
2269 return false;
2271 else
2272 vecitype = comp_vectype;
2273 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2275 return false;
2280 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2281 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2282 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2284 static tree
2285 adjust_bool_pattern_cast (tree type, tree var)
2287 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2288 gimple cast_stmt, pattern_stmt;
2290 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2291 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2292 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2293 cast_stmt
2294 = gimple_build_assign_with_ops (NOP_EXPR,
2295 vect_recog_temp_ssa_var (type, NULL),
2296 gimple_assign_lhs (pattern_stmt),
2297 NULL_TREE);
2298 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2299 return gimple_assign_lhs (cast_stmt);
2303 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2304 recursively. VAR is an SSA_NAME that should be transformed from bool
2305 to a wider integer type, OUT_TYPE is the desired final integer type of
2306 the whole pattern, TRUEVAL should be NULL unless optimizing
2307 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2308 in the then_clause, STMTS is where statements with added pattern stmts
2309 should be pushed to. */
2311 static tree
2312 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2313 VEC (gimple, heap) **stmts)
2315 gimple stmt = SSA_NAME_DEF_STMT (var);
2316 enum tree_code rhs_code, def_rhs_code;
2317 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2318 location_t loc;
2319 gimple pattern_stmt, def_stmt;
2321 rhs1 = gimple_assign_rhs1 (stmt);
2322 rhs2 = gimple_assign_rhs2 (stmt);
2323 rhs_code = gimple_assign_rhs_code (stmt);
2324 loc = gimple_location (stmt);
2325 switch (rhs_code)
2327 case SSA_NAME:
2328 CASE_CONVERT:
2329 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2330 itype = TREE_TYPE (irhs1);
2331 pattern_stmt
2332 = gimple_build_assign_with_ops (SSA_NAME,
2333 vect_recog_temp_ssa_var (itype, NULL),
2334 irhs1, NULL_TREE);
2335 break;
2337 case BIT_NOT_EXPR:
2338 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2339 itype = TREE_TYPE (irhs1);
2340 pattern_stmt
2341 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2342 vect_recog_temp_ssa_var (itype, NULL),
2343 irhs1, build_int_cst (itype, 1));
2344 break;
2346 case BIT_AND_EXPR:
2347 /* Try to optimize x = y & (a < b ? 1 : 0); into
2348 x = (a < b ? y : 0);
2350 E.g. for:
2351 bool a_b, b_b, c_b;
2352 TYPE d_T;
2354 S1 a_b = x1 CMP1 y1;
2355 S2 b_b = x2 CMP2 y2;
2356 S3 c_b = a_b & b_b;
2357 S4 d_T = (TYPE) c_b;
2359 we would normally emit:
2361 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2362 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2363 S3' c_T = a_T & b_T;
2364 S4' d_T = c_T;
2366 but we can save one stmt by using the
2367 result of one of the COND_EXPRs in the other COND_EXPR and leave
2368 BIT_AND_EXPR stmt out:
2370 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2371 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2372 S4' f_T = c_T;
2374 At least when VEC_COND_EXPR is implemented using masks
2375 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2376 computes the comparison masks and ands it, in one case with
2377 all ones vector, in the other case with a vector register.
2378 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2379 often more expensive. */
2380 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2381 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2382 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2384 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2385 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2386 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2387 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2389 gimple tstmt;
2390 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2391 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2392 tstmt = VEC_pop (gimple, *stmts);
2393 gcc_assert (tstmt == def_stmt);
2394 VEC_quick_push (gimple, *stmts, stmt);
2395 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2396 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2397 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2398 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2399 return irhs2;
2401 else
2402 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2403 goto and_ior_xor;
2405 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2406 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2407 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2409 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2410 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2411 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2412 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2414 gimple tstmt;
2415 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2416 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2417 tstmt = VEC_pop (gimple, *stmts);
2418 gcc_assert (tstmt == def_stmt);
2419 VEC_quick_push (gimple, *stmts, stmt);
2420 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2421 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2422 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2423 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2424 return irhs1;
2426 else
2427 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2428 goto and_ior_xor;
2430 /* FALLTHRU */
2431 case BIT_IOR_EXPR:
2432 case BIT_XOR_EXPR:
2433 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2434 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2435 and_ior_xor:
2436 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2437 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2439 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2440 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2441 int out_prec = TYPE_PRECISION (out_type);
2442 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2443 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2444 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2445 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2446 else
2448 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2449 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2452 itype = TREE_TYPE (irhs1);
2453 pattern_stmt
2454 = gimple_build_assign_with_ops (rhs_code,
2455 vect_recog_temp_ssa_var (itype, NULL),
2456 irhs1, irhs2);
2457 break;
2459 default:
2460 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2461 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2462 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2463 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2464 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2466 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2467 itype
2468 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2470 else
2471 itype = TREE_TYPE (rhs1);
2472 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2473 if (trueval == NULL_TREE)
2474 trueval = build_int_cst (itype, 1);
2475 else
2476 gcc_checking_assert (useless_type_conversion_p (itype,
2477 TREE_TYPE (trueval)));
2478 pattern_stmt
2479 = gimple_build_assign_with_ops3 (COND_EXPR,
2480 vect_recog_temp_ssa_var (itype, NULL),
2481 cond_expr, trueval,
2482 build_int_cst (itype, 0));
2483 break;
2486 VEC_safe_push (gimple, heap, *stmts, stmt);
2487 gimple_set_location (pattern_stmt, loc);
2488 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2489 return gimple_assign_lhs (pattern_stmt);
2493 /* Function vect_recog_bool_pattern
2495 Try to find pattern like following:
2497 bool a_b, b_b, c_b, d_b, e_b;
2498 TYPE f_T;
2499 loop:
2500 S1 a_b = x1 CMP1 y1;
2501 S2 b_b = x2 CMP2 y2;
2502 S3 c_b = a_b & b_b;
2503 S4 d_b = x3 CMP3 y3;
2504 S5 e_b = c_b | d_b;
2505 S6 f_T = (TYPE) e_b;
2507 where type 'TYPE' is an integral type.
2509 Input:
2511 * LAST_STMT: A stmt at the end from which the pattern
2512 search begins, i.e. cast of a bool to
2513 an integer type.
2515 Output:
2517 * TYPE_IN: The type of the input arguments to the pattern.
2519 * TYPE_OUT: The type of the output of this pattern.
2521 * Return value: A new stmt that will be used to replace the pattern.
2523 Assuming size of TYPE is the same as size of all comparisons
2524 (otherwise some casts would be added where needed), the above
2525 sequence we create related pattern stmts:
2526 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2527 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2528 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2529 S5' e_T = c_T | d_T;
2530 S6' f_T = e_T;
2532 Instead of the above S3' we could emit:
2533 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2534 S3' c_T = a_T | b_T;
2535 but the above is more efficient. */
2537 static gimple
2538 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2539 tree *type_out)
2541 gimple last_stmt = VEC_pop (gimple, *stmts);
2542 enum tree_code rhs_code;
2543 tree var, lhs, rhs, vectype;
2544 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2545 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2546 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2547 gimple pattern_stmt;
2549 if (!is_gimple_assign (last_stmt))
2550 return NULL;
2552 var = gimple_assign_rhs1 (last_stmt);
2553 lhs = gimple_assign_lhs (last_stmt);
2555 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2556 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2557 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2558 return NULL;
2560 rhs_code = gimple_assign_rhs_code (last_stmt);
2561 if (CONVERT_EXPR_CODE_P (rhs_code))
2563 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2564 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2565 return NULL;
2566 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2567 if (vectype == NULL_TREE)
2568 return NULL;
2570 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2571 return NULL;
2573 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2574 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2575 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2576 pattern_stmt
2577 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2578 else
2579 pattern_stmt
2580 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2581 *type_out = vectype;
2582 *type_in = vectype;
2583 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2584 if (vect_print_dump_info (REPORT_DETAILS))
2585 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2587 return pattern_stmt;
2589 else if (rhs_code == SSA_NAME
2590 && STMT_VINFO_DATA_REF (stmt_vinfo))
2592 stmt_vec_info pattern_stmt_info;
2593 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2594 gcc_assert (vectype != NULL_TREE);
2595 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2596 return NULL;
2597 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2598 return NULL;
2600 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2601 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2602 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2604 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2605 gimple cast_stmt
2606 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2607 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2608 rhs = rhs2;
2610 pattern_stmt
2611 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2612 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2613 bb_vinfo);
2614 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2615 STMT_VINFO_DATA_REF (pattern_stmt_info)
2616 = STMT_VINFO_DATA_REF (stmt_vinfo);
2617 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2618 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2619 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2620 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2621 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2622 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2623 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2624 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2625 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2626 *type_out = vectype;
2627 *type_in = vectype;
2628 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2629 if (vect_print_dump_info (REPORT_DETAILS))
2630 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2631 return pattern_stmt;
2633 else
2634 return NULL;
2638 /* Mark statements that are involved in a pattern. */
2640 static inline void
2641 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2642 tree pattern_vectype)
2644 stmt_vec_info pattern_stmt_info, def_stmt_info;
2645 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2646 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2647 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2648 gimple def_stmt;
2650 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2651 if (pattern_stmt_info == NULL)
2653 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2654 bb_vinfo);
2655 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2657 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2659 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2660 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2661 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2662 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2663 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2664 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2665 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2666 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2667 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2669 gimple_stmt_iterator si;
2670 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2671 !gsi_end_p (si); gsi_next (&si))
2673 def_stmt = gsi_stmt (si);
2674 def_stmt_info = vinfo_for_stmt (def_stmt);
2675 if (def_stmt_info == NULL)
2677 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2678 bb_vinfo);
2679 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2681 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2682 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2683 STMT_VINFO_DEF_TYPE (def_stmt_info)
2684 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2685 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2686 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2691 /* Function vect_pattern_recog_1
2693 Input:
2694 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2695 computation pattern.
2696 STMT: A stmt from which the pattern search should start.
2698 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2699 expression that computes the same functionality and can be used to
2700 replace the sequence of stmts that are involved in the pattern.
2702 Output:
2703 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2704 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2705 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2706 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2707 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2708 to the available target pattern.
2710 This function also does some bookkeeping, as explained in the documentation
2711 for vect_recog_pattern. */
2713 static void
2714 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2715 gimple_stmt_iterator si,
2716 VEC (gimple, heap) **stmts_to_replace)
2718 gimple stmt = gsi_stmt (si), pattern_stmt;
2719 stmt_vec_info stmt_info;
2720 loop_vec_info loop_vinfo;
2721 tree pattern_vectype;
2722 tree type_in, type_out;
2723 enum tree_code code;
2724 int i;
2725 gimple next;
2727 VEC_truncate (gimple, *stmts_to_replace, 0);
2728 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2729 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2730 if (!pattern_stmt)
2731 return;
2733 stmt = VEC_last (gimple, *stmts_to_replace);
2734 stmt_info = vinfo_for_stmt (stmt);
2735 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2737 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2739 /* No need to check target support (already checked by the pattern
2740 recognition function). */
2741 pattern_vectype = type_out ? type_out : type_in;
2743 else
2745 enum machine_mode vec_mode;
2746 enum insn_code icode;
2747 optab optab;
2749 /* Check target support */
2750 type_in = get_vectype_for_scalar_type (type_in);
2751 if (!type_in)
2752 return;
2753 if (type_out)
2754 type_out = get_vectype_for_scalar_type (type_out);
2755 else
2756 type_out = type_in;
2757 if (!type_out)
2758 return;
2759 pattern_vectype = type_out;
2761 if (is_gimple_assign (pattern_stmt))
2762 code = gimple_assign_rhs_code (pattern_stmt);
2763 else
2765 gcc_assert (is_gimple_call (pattern_stmt));
2766 code = CALL_EXPR;
2769 optab = optab_for_tree_code (code, type_in, optab_default);
2770 vec_mode = TYPE_MODE (type_in);
2771 if (!optab
2772 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2773 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2774 return;
2777 /* Found a vectorizable pattern. */
2778 if (vect_print_dump_info (REPORT_DETAILS))
2780 fprintf (vect_dump, "pattern recognized: ");
2781 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2784 /* Mark the stmts that are involved in the pattern. */
2785 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2787 /* Patterns cannot be vectorized using SLP, because they change the order of
2788 computation. */
2789 if (loop_vinfo)
2790 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2791 if (next == stmt)
2792 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2794 /* It is possible that additional pattern stmts are created and inserted in
2795 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2796 relevant statements. */
2797 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2798 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2799 i++)
2801 stmt_info = vinfo_for_stmt (stmt);
2802 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2803 if (vect_print_dump_info (REPORT_DETAILS))
2805 fprintf (vect_dump, "additional pattern stmt: ");
2806 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2809 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2814 /* Function vect_pattern_recog
2816 Input:
2817 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2818 computation idioms.
2820 Output - for each computation idiom that is detected we create a new stmt
2821 that provides the same functionality and that can be vectorized. We
2822 also record some information in the struct_stmt_info of the relevant
2823 stmts, as explained below:
2825 At the entry to this function we have the following stmts, with the
2826 following initial value in the STMT_VINFO fields:
2828 stmt in_pattern_p related_stmt vec_stmt
2829 S1: a_i = .... - - -
2830 S2: a_2 = ..use(a_i).. - - -
2831 S3: a_1 = ..use(a_2).. - - -
2832 S4: a_0 = ..use(a_1).. - - -
2833 S5: ... = ..use(a_0).. - - -
2835 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2836 represented by a single stmt. We then:
2837 - create a new stmt S6 equivalent to the pattern (the stmt is not
2838 inserted into the code)
2839 - fill in the STMT_VINFO fields as follows:
2841 in_pattern_p related_stmt vec_stmt
2842 S1: a_i = .... - - -
2843 S2: a_2 = ..use(a_i).. - - -
2844 S3: a_1 = ..use(a_2).. - - -
2845 S4: a_0 = ..use(a_1).. true S6 -
2846 '---> S6: a_new = .... - S4 -
2847 S5: ... = ..use(a_0).. - - -
2849 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2850 to each other through the RELATED_STMT field).
2852 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2853 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2854 remain irrelevant unless used by stmts other than S4.
2856 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2857 (because they are marked as irrelevant). It will vectorize S6, and record
2858 a pointer to the new vector stmt VS6 from S6 (as usual).
2859 S4 will be skipped, and S5 will be vectorized as usual:
2861 in_pattern_p related_stmt vec_stmt
2862 S1: a_i = .... - - -
2863 S2: a_2 = ..use(a_i).. - - -
2864 S3: a_1 = ..use(a_2).. - - -
2865 > VS6: va_new = .... - - -
2866 S4: a_0 = ..use(a_1).. true S6 VS6
2867 '---> S6: a_new = .... - S4 VS6
2868 > VS5: ... = ..vuse(va_new).. - - -
2869 S5: ... = ..use(a_0).. - - -
2871 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2872 elsewhere), and we'll end up with:
2874 VS6: va_new = ....
2875 VS5: ... = ..vuse(va_new)..
2877 In case of more than one pattern statements, e.g., widen-mult with
2878 intermediate type:
2880 S1 a_t = ;
2881 S2 a_T = (TYPE) a_t;
2882 '--> S3: a_it = (interm_type) a_t;
2883 S4 prod_T = a_T * CONST;
2884 '--> S5: prod_T' = a_it w* CONST;
2886 there may be other users of a_T outside the pattern. In that case S2 will
2887 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2888 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2889 be recorded in S3. */
2891 void
2892 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2894 struct loop *loop;
2895 basic_block *bbs;
2896 unsigned int nbbs;
2897 gimple_stmt_iterator si;
2898 unsigned int i, j;
2899 vect_recog_func_ptr vect_recog_func;
2900 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2901 gimple stmt;
2903 if (vect_print_dump_info (REPORT_DETAILS))
2904 fprintf (vect_dump, "=== vect_pattern_recog ===");
2906 if (loop_vinfo)
2908 loop = LOOP_VINFO_LOOP (loop_vinfo);
2909 bbs = LOOP_VINFO_BBS (loop_vinfo);
2910 nbbs = loop->num_nodes;
2912 else
2914 bbs = &BB_VINFO_BB (bb_vinfo);
2915 nbbs = 1;
2918 /* Scan through the loop stmts, applying the pattern recognition
2919 functions starting at each stmt visited: */
2920 for (i = 0; i < nbbs; i++)
2922 basic_block bb = bbs[i];
2923 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2925 if (bb_vinfo && (stmt = gsi_stmt (si))
2926 && vinfo_for_stmt (stmt)
2927 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2928 continue;
2930 /* Scan over all generic vect_recog_xxx_pattern functions. */
2931 for (j = 0; j < NUM_PATTERNS; j++)
2933 vect_recog_func = vect_vect_recog_func_ptrs[j];
2934 vect_pattern_recog_1 (vect_recog_func, si,
2935 &stmts_to_replace);
2940 VEC_free (gimple, heap, stmts_to_replace);