ChangeLog entry:
[official-gcc.git] / gcc / tree-vect-patterns.c
blob79357f51f5f66b4efd37c5df102c462b930f320c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 tree *, tree *);
56 static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_over_widening_pattern,
67 vect_recog_widen_shift_pattern,
68 vect_recog_vector_vector_shift_pattern,
69 vect_recog_sdivmod_pow2_pattern,
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
73 static inline void
74 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 stmt);
80 static inline void
81 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
87 /* Check whether NAME, an ssa-name used in USE_STMT,
88 is a result of a type promotion or demotion, such that:
89 DEF_STMT: NAME = NOP (name0)
90 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
91 If CHECK_SIGN is TRUE, check that either both types are signed or both are
92 unsigned. */
94 static bool
95 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
96 tree *orig_type, gimple *def_stmt, bool *promotion)
98 tree dummy;
99 gimple dummy_gimple;
100 loop_vec_info loop_vinfo;
101 stmt_vec_info stmt_vinfo;
102 tree type = TREE_TYPE (name);
103 tree oprnd0;
104 enum vect_def_type dt;
105 tree def;
106 bb_vec_info bb_vinfo;
108 stmt_vinfo = vinfo_for_stmt (use_stmt);
109 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
110 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
111 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
112 &def, &dt))
113 return false;
115 if (dt != vect_internal_def
116 && dt != vect_external_def && dt != vect_constant_def)
117 return false;
119 if (!*def_stmt)
120 return false;
122 if (!is_gimple_assign (*def_stmt))
123 return false;
125 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
126 return false;
128 oprnd0 = gimple_assign_rhs1 (*def_stmt);
130 *orig_type = TREE_TYPE (oprnd0);
131 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
132 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
133 return false;
135 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
136 *promotion = true;
137 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
138 *promotion = false;
139 else
140 return false;
142 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
143 bb_vinfo, &dummy_gimple, &dummy, &dt))
144 return false;
146 return true;
149 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
150 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
152 static tree
153 vect_recog_temp_ssa_var (tree type, gimple stmt)
155 tree var = create_tmp_var (type, "patt");
157 add_referenced_var (var);
158 var = make_ssa_name (var, stmt);
159 return var;
162 /* Function vect_recog_dot_prod_pattern
164 Try to find the following pattern:
166 type x_t, y_t;
167 TYPE1 prod;
168 TYPE2 sum = init;
169 loop:
170 sum_0 = phi <init, sum_1>
171 S1 x_t = ...
172 S2 y_t = ...
173 S3 x_T = (TYPE1) x_t;
174 S4 y_T = (TYPE1) y_t;
175 S5 prod = x_T * y_T;
176 [S6 prod = (TYPE2) prod; #optional]
177 S7 sum_1 = prod + sum_0;
179 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
180 same size of 'TYPE1' or bigger. This is a special case of a reduction
181 computation.
183 Input:
185 * STMTS: Contains a stmt from which the pattern search begins. In the
186 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
187 will be detected.
189 Output:
191 * TYPE_IN: The type of the input arguments to the pattern.
193 * TYPE_OUT: The type of the output of this pattern.
195 * Return value: A new stmt that will be used to replace the sequence of
196 stmts that constitute the pattern. In this case it will be:
197 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
199 Note: The dot-prod idiom is a widening reduction pattern that is
200 vectorized without preserving all the intermediate results. It
201 produces only N/2 (widened) results (by summing up pairs of
202 intermediate results) rather than all N results. Therefore, we
203 cannot allow this pattern when we want to get all the results and in
204 the correct order (as is the case when this computation is in an
205 inner-loop nested in an outer-loop that us being vectorized). */
207 static gimple
208 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
209 tree *type_out)
211 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
212 tree oprnd0, oprnd1;
213 tree oprnd00, oprnd01;
214 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
215 tree type, half_type;
216 gimple pattern_stmt;
217 tree prod_type;
218 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
219 struct loop *loop;
220 tree var;
221 bool promotion;
223 if (!loop_info)
224 return NULL;
226 loop = LOOP_VINFO_LOOP (loop_info);
228 if (!is_gimple_assign (last_stmt))
229 return NULL;
231 type = gimple_expr_type (last_stmt);
233 /* Look for the following pattern
234 DX = (TYPE1) X;
235 DY = (TYPE1) Y;
236 DPROD = DX * DY;
237 DDPROD = (TYPE2) DPROD;
238 sum_1 = DDPROD + sum_0;
239 In which
240 - DX is double the size of X
241 - DY is double the size of Y
242 - DX, DY, DPROD all have the same type
243 - sum is the same size of DPROD or bigger
244 - sum has been recognized as a reduction variable.
246 This is equivalent to:
247 DPROD = X w* Y; #widen mult
248 sum_1 = DPROD w+ sum_0; #widen summation
250 DPROD = X w* Y; #widen mult
251 sum_1 = DPROD + sum_0; #summation
254 /* Starting from LAST_STMT, follow the defs of its uses in search
255 of the above pattern. */
257 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
258 return NULL;
260 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
262 /* Has been detected as widening-summation? */
264 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
265 type = gimple_expr_type (stmt);
266 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
267 return NULL;
268 oprnd0 = gimple_assign_rhs1 (stmt);
269 oprnd1 = gimple_assign_rhs2 (stmt);
270 half_type = TREE_TYPE (oprnd0);
272 else
274 gimple def_stmt;
276 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
277 return NULL;
278 oprnd0 = gimple_assign_rhs1 (last_stmt);
279 oprnd1 = gimple_assign_rhs2 (last_stmt);
280 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
281 || !types_compatible_p (TREE_TYPE (oprnd1), type))
282 return NULL;
283 stmt = last_stmt;
285 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
286 &promotion)
287 && promotion)
289 stmt = def_stmt;
290 oprnd0 = gimple_assign_rhs1 (stmt);
292 else
293 half_type = type;
296 /* So far so good. Since last_stmt was detected as a (summation) reduction,
297 we know that oprnd1 is the reduction variable (defined by a loop-header
298 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
299 Left to check that oprnd0 is defined by a (widen_)mult_expr */
300 if (TREE_CODE (oprnd0) != SSA_NAME)
301 return NULL;
303 prod_type = half_type;
304 stmt = SSA_NAME_DEF_STMT (oprnd0);
306 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
307 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
308 return NULL;
310 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
311 inside the loop (in case we are analyzing an outer-loop). */
312 if (!is_gimple_assign (stmt))
313 return NULL;
314 stmt_vinfo = vinfo_for_stmt (stmt);
315 gcc_assert (stmt_vinfo);
316 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
317 return NULL;
318 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
319 return NULL;
320 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
322 /* Has been detected as a widening multiplication? */
324 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
325 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
326 return NULL;
327 stmt_vinfo = vinfo_for_stmt (stmt);
328 gcc_assert (stmt_vinfo);
329 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
330 oprnd00 = gimple_assign_rhs1 (stmt);
331 oprnd01 = gimple_assign_rhs2 (stmt);
333 else
335 tree half_type0, half_type1;
336 gimple def_stmt;
337 tree oprnd0, oprnd1;
339 oprnd0 = gimple_assign_rhs1 (stmt);
340 oprnd1 = gimple_assign_rhs2 (stmt);
341 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
342 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
343 return NULL;
344 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
345 &promotion)
346 || !promotion)
347 return NULL;
348 oprnd00 = gimple_assign_rhs1 (def_stmt);
349 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
350 &promotion)
351 || !promotion)
352 return NULL;
353 oprnd01 = gimple_assign_rhs1 (def_stmt);
354 if (!types_compatible_p (half_type0, half_type1))
355 return NULL;
356 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
357 return NULL;
360 half_type = TREE_TYPE (oprnd00);
361 *type_in = half_type;
362 *type_out = type;
364 /* Pattern detected. Create a stmt to be used to replace the pattern: */
365 var = vect_recog_temp_ssa_var (type, NULL);
366 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
367 oprnd00, oprnd01, oprnd1);
369 if (vect_print_dump_info (REPORT_DETAILS))
371 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
372 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
375 /* We don't allow changing the order of the computation in the inner-loop
376 when doing outer-loop vectorization. */
377 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
379 return pattern_stmt;
383 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
384 and LSHIFT_EXPR.
386 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
387 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
389 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
390 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
391 that satisfies the above restrictions, we can perform a widening opeartion
392 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
393 with a_it = (interm_type) a_t; */
395 static bool
396 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
397 tree const_oprnd, tree *oprnd,
398 VEC (gimple, heap) **stmts, tree type,
399 tree *half_type, gimple def_stmt)
401 tree new_type, new_oprnd, tmp;
402 gimple new_stmt;
403 loop_vec_info loop_vinfo;
404 struct loop *loop = NULL;
405 bb_vec_info bb_vinfo;
406 stmt_vec_info stmt_vinfo;
408 stmt_vinfo = vinfo_for_stmt (stmt);
409 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
410 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
411 if (loop_vinfo)
412 loop = LOOP_VINFO_LOOP (loop_vinfo);
414 if (code != MULT_EXPR && code != LSHIFT_EXPR)
415 return false;
417 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
418 || (code == LSHIFT_EXPR
419 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
420 != 1))
421 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
423 /* CONST_OPRND is a constant of HALF_TYPE. */
424 *oprnd = gimple_assign_rhs1 (def_stmt);
425 return true;
428 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
429 || !gimple_bb (def_stmt)
430 || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
431 || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_vinfo)
432 && gimple_code (def_stmt) != GIMPLE_PHI)
433 || !vinfo_for_stmt (def_stmt))
434 return false;
436 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
437 a type 2 times bigger than HALF_TYPE. */
438 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
439 TYPE_UNSIGNED (type));
440 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
441 || (code == LSHIFT_EXPR
442 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
443 return false;
445 /* Use NEW_TYPE for widening operation. */
446 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
448 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
449 /* Check if the already created pattern stmt is what we need. */
450 if (!is_gimple_assign (new_stmt)
451 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
452 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
453 return false;
455 VEC_safe_push (gimple, heap, *stmts, def_stmt);
456 *oprnd = gimple_assign_lhs (new_stmt);
458 else
460 /* Create a_T = (NEW_TYPE) a_t; */
461 *oprnd = gimple_assign_rhs1 (def_stmt);
462 tmp = create_tmp_var (new_type, NULL);
463 add_referenced_var (tmp);
464 new_oprnd = make_ssa_name (tmp, NULL);
465 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
466 NULL_TREE);
467 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
468 VEC_safe_push (gimple, heap, *stmts, def_stmt);
469 *oprnd = new_oprnd;
472 *half_type = new_type;
473 return true;
477 /* Function vect_recog_widen_mult_pattern
479 Try to find the following pattern:
481 type a_t, b_t;
482 TYPE a_T, b_T, prod_T;
484 S1 a_t = ;
485 S2 b_t = ;
486 S3 a_T = (TYPE) a_t;
487 S4 b_T = (TYPE) b_t;
488 S5 prod_T = a_T * b_T;
490 where type 'TYPE' is at least double the size of type 'type'.
492 Also detect unsigned cases:
494 unsigned type a_t, b_t;
495 unsigned TYPE u_prod_T;
496 TYPE a_T, b_T, prod_T;
498 S1 a_t = ;
499 S2 b_t = ;
500 S3 a_T = (TYPE) a_t;
501 S4 b_T = (TYPE) b_t;
502 S5 prod_T = a_T * b_T;
503 S6 u_prod_T = (unsigned TYPE) prod_T;
505 and multiplication by constants:
507 type a_t;
508 TYPE a_T, prod_T;
510 S1 a_t = ;
511 S3 a_T = (TYPE) a_t;
512 S5 prod_T = a_T * CONST;
514 A special case of multiplication by constants is when 'TYPE' is 4 times
515 bigger than 'type', but CONST fits an intermediate type 2 times smaller
516 than 'TYPE'. In that case we create an additional pattern stmt for S3
517 to create a variable of the intermediate type, and perform widen-mult
518 on the intermediate type as well:
520 type a_t;
521 interm_type a_it;
522 TYPE a_T, prod_T, prod_T';
524 S1 a_t = ;
525 S3 a_T = (TYPE) a_t;
526 '--> a_it = (interm_type) a_t;
527 S5 prod_T = a_T * CONST;
528 '--> prod_T' = a_it w* CONST;
530 Input/Output:
532 * STMTS: Contains a stmt from which the pattern search begins. In the
533 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
534 is detected. In case of unsigned widen-mult, the original stmt (S5) is
535 replaced with S6 in STMTS. In case of multiplication by a constant
536 of an intermediate type (the last case above), STMTS also contains S3
537 (inserted before S5).
539 Output:
541 * TYPE_IN: The type of the input arguments to the pattern.
543 * TYPE_OUT: The type of the output of this pattern.
545 * Return value: A new stmt that will be used to replace the sequence of
546 stmts that constitute the pattern. In this case it will be:
547 WIDEN_MULT <a_t, b_t>
550 static gimple
551 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
552 tree *type_in, tree *type_out)
554 gimple last_stmt = VEC_pop (gimple, *stmts);
555 gimple def_stmt0, def_stmt1;
556 tree oprnd0, oprnd1;
557 tree type, half_type0, half_type1;
558 gimple pattern_stmt;
559 tree vectype, vectype_out = NULL_TREE;
560 tree dummy;
561 tree var;
562 enum tree_code dummy_code;
563 int dummy_int;
564 VEC (tree, heap) *dummy_vec;
565 bool op1_ok;
566 bool promotion;
567 loop_vec_info loop_vinfo;
568 struct loop *loop = NULL;
569 bb_vec_info bb_vinfo;
570 stmt_vec_info stmt_vinfo;
572 stmt_vinfo = vinfo_for_stmt (last_stmt);
573 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
574 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
575 if (loop_vinfo)
576 loop = LOOP_VINFO_LOOP (loop_vinfo);
578 if (!is_gimple_assign (last_stmt))
579 return NULL;
581 type = gimple_expr_type (last_stmt);
583 /* Starting from LAST_STMT, follow the defs of its uses in search
584 of the above pattern. */
586 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
587 return NULL;
589 oprnd0 = gimple_assign_rhs1 (last_stmt);
590 oprnd1 = gimple_assign_rhs2 (last_stmt);
591 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
592 || !types_compatible_p (TREE_TYPE (oprnd1), type))
593 return NULL;
595 /* Check argument 0. */
596 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
597 &promotion)
598 || !promotion)
599 return NULL;
600 /* Check argument 1. */
601 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
602 &def_stmt1, &promotion);
604 if (op1_ok && promotion)
606 oprnd0 = gimple_assign_rhs1 (def_stmt0);
607 oprnd1 = gimple_assign_rhs1 (def_stmt1);
609 else
611 if (TREE_CODE (oprnd1) == INTEGER_CST
612 && TREE_CODE (half_type0) == INTEGER_TYPE
613 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
614 &oprnd0, stmts, type,
615 &half_type0, def_stmt0))
616 half_type1 = half_type0;
617 else
618 return NULL;
621 /* Handle unsigned case. Look for
622 S6 u_prod_T = (unsigned TYPE) prod_T;
623 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
624 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
626 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
627 imm_use_iterator imm_iter;
628 use_operand_p use_p;
629 int nuses = 0;
630 gimple use_stmt = NULL;
631 tree use_type;
633 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
634 return NULL;
636 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
638 if (is_gimple_debug (USE_STMT (use_p)))
639 continue;
640 use_stmt = USE_STMT (use_p);
641 nuses++;
644 if (nuses != 1 || !is_gimple_assign (use_stmt)
645 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
646 return NULL;
648 if (!gimple_bb (use_stmt)
649 || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
650 || (!loop && gimple_bb (use_stmt) != BB_VINFO_BB (bb_vinfo)))
651 return NULL;
653 use_lhs = gimple_assign_lhs (use_stmt);
654 use_type = TREE_TYPE (use_lhs);
655 if (!INTEGRAL_TYPE_P (use_type)
656 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
657 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
658 return NULL;
660 type = use_type;
661 last_stmt = use_stmt;
664 if (!types_compatible_p (half_type0, half_type1))
665 return NULL;
667 /* Pattern detected. */
668 if (vect_print_dump_info (REPORT_DETAILS))
669 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
671 /* Check target support */
672 vectype = get_vectype_for_scalar_type (half_type0);
673 vectype_out = get_vectype_for_scalar_type (type);
674 if (!vectype
675 || !vectype_out
676 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
677 vectype_out, vectype,
678 &dummy, &dummy, &dummy_code,
679 &dummy_code, &dummy_int, &dummy_vec))
680 return NULL;
682 *type_in = vectype;
683 *type_out = vectype_out;
685 /* Pattern supported. Create a stmt to be used to replace the pattern: */
686 var = vect_recog_temp_ssa_var (type, NULL);
687 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
688 oprnd1);
690 if (vect_print_dump_info (REPORT_DETAILS))
691 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
693 VEC_safe_push (gimple, heap, *stmts, last_stmt);
694 return pattern_stmt;
698 /* Function vect_recog_pow_pattern
700 Try to find the following pattern:
702 x = POW (y, N);
704 with POW being one of pow, powf, powi, powif and N being
705 either 2 or 0.5.
707 Input:
709 * LAST_STMT: A stmt from which the pattern search begins.
711 Output:
713 * TYPE_IN: The type of the input arguments to the pattern.
715 * TYPE_OUT: The type of the output of this pattern.
717 * Return value: A new stmt that will be used to replace the sequence of
718 stmts that constitute the pattern. In this case it will be:
719 x = x * x
721 x = sqrt (x)
724 static gimple
725 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
726 tree *type_out)
728 gimple last_stmt = VEC_index (gimple, *stmts, 0);
729 tree fn, base, exp = NULL;
730 gimple stmt;
731 tree var;
733 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
734 return NULL;
736 fn = gimple_call_fndecl (last_stmt);
737 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
738 return NULL;
740 switch (DECL_FUNCTION_CODE (fn))
742 case BUILT_IN_POWIF:
743 case BUILT_IN_POWI:
744 case BUILT_IN_POWF:
745 case BUILT_IN_POW:
746 base = gimple_call_arg (last_stmt, 0);
747 exp = gimple_call_arg (last_stmt, 1);
748 if (TREE_CODE (exp) != REAL_CST
749 && TREE_CODE (exp) != INTEGER_CST)
750 return NULL;
751 break;
753 default:
754 return NULL;
757 /* We now have a pow or powi builtin function call with a constant
758 exponent. */
760 *type_out = NULL_TREE;
762 /* Catch squaring. */
763 if ((host_integerp (exp, 0)
764 && tree_low_cst (exp, 0) == 2)
765 || (TREE_CODE (exp) == REAL_CST
766 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
768 *type_in = TREE_TYPE (base);
770 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
771 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
772 return stmt;
775 /* Catch square root. */
776 if (TREE_CODE (exp) == REAL_CST
777 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
779 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
780 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
781 if (*type_in)
783 gimple stmt = gimple_build_call (newfn, 1, base);
784 if (vectorizable_function (stmt, *type_in, *type_in)
785 != NULL_TREE)
787 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
788 gimple_call_set_lhs (stmt, var);
789 return stmt;
794 return NULL;
798 /* Function vect_recog_widen_sum_pattern
800 Try to find the following pattern:
802 type x_t;
803 TYPE x_T, sum = init;
804 loop:
805 sum_0 = phi <init, sum_1>
806 S1 x_t = *p;
807 S2 x_T = (TYPE) x_t;
808 S3 sum_1 = x_T + sum_0;
810 where type 'TYPE' is at least double the size of type 'type', i.e - we're
811 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
812 a special case of a reduction computation.
814 Input:
816 * LAST_STMT: A stmt from which the pattern search begins. In the example,
817 when this function is called with S3, the pattern {S2,S3} will be detected.
819 Output:
821 * TYPE_IN: The type of the input arguments to the pattern.
823 * TYPE_OUT: The type of the output of this pattern.
825 * Return value: A new stmt that will be used to replace the sequence of
826 stmts that constitute the pattern. In this case it will be:
827 WIDEN_SUM <x_t, sum_0>
829 Note: The widening-sum idiom is a widening reduction pattern that is
830 vectorized without preserving all the intermediate results. It
831 produces only N/2 (widened) results (by summing up pairs of
832 intermediate results) rather than all N results. Therefore, we
833 cannot allow this pattern when we want to get all the results and in
834 the correct order (as is the case when this computation is in an
835 inner-loop nested in an outer-loop that us being vectorized). */
837 static gimple
838 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
839 tree *type_out)
841 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
842 tree oprnd0, oprnd1;
843 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
844 tree type, half_type;
845 gimple pattern_stmt;
846 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
847 struct loop *loop;
848 tree var;
849 bool promotion;
851 if (!loop_info)
852 return NULL;
854 loop = LOOP_VINFO_LOOP (loop_info);
856 if (!is_gimple_assign (last_stmt))
857 return NULL;
859 type = gimple_expr_type (last_stmt);
861 /* Look for the following pattern
862 DX = (TYPE) X;
863 sum_1 = DX + sum_0;
864 In which DX is at least double the size of X, and sum_1 has been
865 recognized as a reduction variable.
868 /* Starting from LAST_STMT, follow the defs of its uses in search
869 of the above pattern. */
871 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
872 return NULL;
874 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
875 return NULL;
877 oprnd0 = gimple_assign_rhs1 (last_stmt);
878 oprnd1 = gimple_assign_rhs2 (last_stmt);
879 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
880 || !types_compatible_p (TREE_TYPE (oprnd1), type))
881 return NULL;
883 /* So far so good. Since last_stmt was detected as a (summation) reduction,
884 we know that oprnd1 is the reduction variable (defined by a loop-header
885 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
886 Left to check that oprnd0 is defined by a cast from type 'type' to type
887 'TYPE'. */
889 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
890 &promotion)
891 || !promotion)
892 return NULL;
894 oprnd0 = gimple_assign_rhs1 (stmt);
895 *type_in = half_type;
896 *type_out = type;
898 /* Pattern detected. Create a stmt to be used to replace the pattern: */
899 var = vect_recog_temp_ssa_var (type, NULL);
900 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
901 oprnd0, oprnd1);
903 if (vect_print_dump_info (REPORT_DETAILS))
905 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
906 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
909 /* We don't allow changing the order of the computation in the inner-loop
910 when doing outer-loop vectorization. */
911 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
913 return pattern_stmt;
917 /* Return TRUE if the operation in STMT can be performed on a smaller type.
919 Input:
920 STMT - a statement to check.
921 DEF - we support operations with two operands, one of which is constant.
922 The other operand can be defined by a demotion operation, or by a
923 previous statement in a sequence of over-promoted operations. In the
924 later case DEF is used to replace that operand. (It is defined by a
925 pattern statement we created for the previous statement in the
926 sequence).
928 Input/output:
929 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
930 NULL, it's the type of DEF.
931 STMTS - additional pattern statements. If a pattern statement (type
932 conversion) is created in this function, its original statement is
933 added to STMTS.
935 Output:
936 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
937 operands to use in the new pattern statement for STMT (will be created
938 in vect_recog_over_widening_pattern ()).
939 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
940 statements for STMT: the first one is a type promotion and the second
941 one is the operation itself. We return the type promotion statement
942 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
943 the second pattern statement. */
945 static bool
946 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
947 tree *op0, tree *op1, gimple *new_def_stmt,
948 VEC (gimple, heap) **stmts)
950 enum tree_code code;
951 tree const_oprnd, oprnd;
952 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
953 gimple def_stmt, new_stmt;
954 bool first = false;
955 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
956 bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
957 struct loop *loop = NULL;
958 bool promotion;
960 if (loop_info)
961 loop = LOOP_VINFO_LOOP (loop_info);
963 *op0 = NULL_TREE;
964 *op1 = NULL_TREE;
965 *new_def_stmt = NULL;
967 if (!is_gimple_assign (stmt))
968 return false;
970 code = gimple_assign_rhs_code (stmt);
971 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
972 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
973 return false;
975 oprnd = gimple_assign_rhs1 (stmt);
976 const_oprnd = gimple_assign_rhs2 (stmt);
977 type = gimple_expr_type (stmt);
979 if (TREE_CODE (oprnd) != SSA_NAME
980 || TREE_CODE (const_oprnd) != INTEGER_CST)
981 return false;
983 /* If we are in the middle of a sequence, we use DEF from a previous
984 statement. Otherwise, OPRND has to be a result of type promotion. */
985 if (*new_type)
987 half_type = *new_type;
988 oprnd = def;
990 else
992 first = true;
993 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
994 &promotion)
995 || !promotion
996 || !gimple_bb (def_stmt)
997 || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
998 || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info)
999 && gimple_code (def_stmt) != GIMPLE_PHI)
1000 || !vinfo_for_stmt (def_stmt))
1001 return false;
1004 /* Can we perform the operation on a smaller type? */
1005 switch (code)
1007 case BIT_IOR_EXPR:
1008 case BIT_XOR_EXPR:
1009 case BIT_AND_EXPR:
1010 if (!int_fits_type_p (const_oprnd, half_type))
1012 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1013 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1014 return false;
1016 interm_type = build_nonstandard_integer_type (
1017 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1018 if (!int_fits_type_p (const_oprnd, interm_type))
1019 return false;
1022 break;
1024 case LSHIFT_EXPR:
1025 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1026 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1027 return false;
1029 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1030 (e.g., if the original value was char, the shift amount is at most 8
1031 if we want to use short). */
1032 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1033 return false;
1035 interm_type = build_nonstandard_integer_type (
1036 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1038 if (!vect_supportable_shift (code, interm_type))
1039 return false;
1041 break;
1043 case RSHIFT_EXPR:
1044 if (vect_supportable_shift (code, half_type))
1045 break;
1047 /* Try intermediate type - HALF_TYPE is not supported. */
1048 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1049 return false;
1051 interm_type = build_nonstandard_integer_type (
1052 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1054 if (!vect_supportable_shift (code, interm_type))
1055 return false;
1057 break;
1059 default:
1060 gcc_unreachable ();
1063 /* There are four possible cases:
1064 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1065 the first statement in the sequence)
1066 a. The original, HALF_TYPE, is not enough - we replace the promotion
1067 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1068 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1069 promotion.
1070 2. OPRND is defined by a pattern statement we created.
1071 a. Its type is not sufficient for the operation, we create a new stmt:
1072 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1073 this statement in NEW_DEF_STMT, and it is later put in
1074 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1075 b. OPRND is good to use in the new statement. */
1076 if (first)
1078 if (interm_type)
1080 /* Replace the original type conversion HALF_TYPE->TYPE with
1081 HALF_TYPE->INTERM_TYPE. */
1082 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1084 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1085 /* Check if the already created pattern stmt is what we need. */
1086 if (!is_gimple_assign (new_stmt)
1087 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1088 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1089 return false;
1091 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1092 oprnd = gimple_assign_lhs (new_stmt);
1094 else
1096 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1097 oprnd = gimple_assign_rhs1 (def_stmt);
1098 tmp = create_tmp_reg (interm_type, NULL);
1099 add_referenced_var (tmp);
1100 new_oprnd = make_ssa_name (tmp, NULL);
1101 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1102 oprnd, NULL_TREE);
1103 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1104 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1105 oprnd = new_oprnd;
1108 else
1110 /* Retrieve the operand before the type promotion. */
1111 oprnd = gimple_assign_rhs1 (def_stmt);
1114 else
1116 if (interm_type)
1118 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1119 tmp = create_tmp_reg (interm_type, NULL);
1120 add_referenced_var (tmp);
1121 new_oprnd = make_ssa_name (tmp, NULL);
1122 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1123 oprnd, NULL_TREE);
1124 oprnd = new_oprnd;
1125 *new_def_stmt = new_stmt;
1128 /* Otherwise, OPRND is already set. */
1131 if (interm_type)
1132 *new_type = interm_type;
1133 else
1134 *new_type = half_type;
1136 *op0 = oprnd;
1137 *op1 = fold_convert (*new_type, const_oprnd);
1139 return true;
1143 /* Try to find a statement or a sequence of statements that can be performed
1144 on a smaller type:
1146 type x_t;
1147 TYPE x_T, res0_T, res1_T;
1148 loop:
1149 S1 x_t = *p;
1150 S2 x_T = (TYPE) x_t;
1151 S3 res0_T = op (x_T, C0);
1152 S4 res1_T = op (res0_T, C1);
1153 S5 ... = () res1_T; - type demotion
1155 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1156 constants.
1157 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1158 be 'type' or some intermediate type. For now, we expect S5 to be a type
1159 demotion operation. We also check that S3 and S4 have only one use. */
1161 static gimple
1162 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1163 tree *type_in, tree *type_out)
1165 gimple stmt = VEC_pop (gimple, *stmts);
1166 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1167 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1168 imm_use_iterator imm_iter;
1169 use_operand_p use_p;
1170 int nuses = 0;
1171 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1172 bool first;
1173 tree type = NULL;
1174 loop_vec_info loop_vinfo;
1175 struct loop *loop = NULL;
1176 bb_vec_info bb_vinfo;
1177 stmt_vec_info stmt_vinfo;
1179 stmt_vinfo = vinfo_for_stmt (stmt);
1180 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1181 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1182 if (loop_vinfo)
1183 loop = LOOP_VINFO_LOOP (loop_vinfo);
1185 first = true;
1186 while (1)
1188 if (!vinfo_for_stmt (stmt)
1189 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1190 return NULL;
1192 new_def_stmt = NULL;
1193 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1194 &op0, &op1, &new_def_stmt,
1195 stmts))
1197 if (first)
1198 return NULL;
1199 else
1200 break;
1203 /* STMT can be performed on a smaller type. Check its uses. */
1204 lhs = gimple_assign_lhs (stmt);
1205 nuses = 0;
1206 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1208 if (is_gimple_debug (USE_STMT (use_p)))
1209 continue;
1210 use_stmt = USE_STMT (use_p);
1211 nuses++;
1214 if (nuses != 1 || !is_gimple_assign (use_stmt)
1215 || !gimple_bb (use_stmt)
1216 || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1217 || (!loop && gimple_bb (use_stmt) != BB_VINFO_BB (bb_vinfo)))
1218 return NULL;
1220 /* Create pattern statement for STMT. */
1221 vectype = get_vectype_for_scalar_type (new_type);
1222 if (!vectype)
1223 return NULL;
1225 /* We want to collect all the statements for which we create pattern
1226 statetments, except for the case when the last statement in the
1227 sequence doesn't have a corresponding pattern statement. In such
1228 case we associate the last pattern statement with the last statement
1229 in the sequence. Therefore, we only add the original statement to
1230 the list if we know that it is not the last. */
1231 if (prev_stmt)
1232 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1234 var = vect_recog_temp_ssa_var (new_type, NULL);
1235 pattern_stmt
1236 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1237 op0, op1);
1238 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1239 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1241 if (vect_print_dump_info (REPORT_DETAILS))
1243 fprintf (vect_dump, "created pattern stmt: ");
1244 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1247 type = gimple_expr_type (stmt);
1248 prev_stmt = stmt;
1249 stmt = use_stmt;
1251 first = false;
1254 /* We got a sequence. We expect it to end with a type demotion operation.
1255 Otherwise, we quit (for now). There are three possible cases: the
1256 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1257 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1258 NEW_TYPE differs (we create a new conversion statement). */
1259 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1261 use_lhs = gimple_assign_lhs (use_stmt);
1262 use_type = TREE_TYPE (use_lhs);
1263 /* Support only type demotion or signedess change. */
1264 if (!INTEGRAL_TYPE_P (use_type)
1265 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1266 return NULL;
1268 /* Check that NEW_TYPE is not bigger than the conversion result. */
1269 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1270 return NULL;
1272 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1273 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1275 /* Create NEW_TYPE->USE_TYPE conversion. */
1276 tmp = create_tmp_reg (use_type, NULL);
1277 add_referenced_var (tmp);
1278 new_oprnd = make_ssa_name (tmp, NULL);
1279 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1280 var, NULL_TREE);
1281 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1283 *type_in = get_vectype_for_scalar_type (new_type);
1284 *type_out = get_vectype_for_scalar_type (use_type);
1286 /* We created a pattern statement for the last statement in the
1287 sequence, so we don't need to associate it with the pattern
1288 statement created for PREV_STMT. Therefore, we add PREV_STMT
1289 to the list in order to mark it later in vect_pattern_recog_1. */
1290 if (prev_stmt)
1291 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1293 else
1295 if (prev_stmt)
1296 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1297 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1299 *type_in = vectype;
1300 *type_out = NULL_TREE;
1303 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1305 else
1306 /* TODO: support general case, create a conversion to the correct type. */
1307 return NULL;
1309 /* Pattern detected. */
1310 if (vect_print_dump_info (REPORT_DETAILS))
1312 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1313 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1316 return pattern_stmt;
1319 /* Detect widening shift pattern:
1321 type a_t;
1322 TYPE a_T, res_T;
1324 S1 a_t = ;
1325 S2 a_T = (TYPE) a_t;
1326 S3 res_T = a_T << CONST;
1328 where type 'TYPE' is at least double the size of type 'type'.
1330 Also detect unsigned cases:
1332 unsigned type a_t;
1333 unsigned TYPE u_res_T;
1334 TYPE a_T, res_T;
1336 S1 a_t = ;
1337 S2 a_T = (TYPE) a_t;
1338 S3 res_T = a_T << CONST;
1339 S4 u_res_T = (unsigned TYPE) res_T;
1341 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1342 create an additional pattern stmt for S2 to create a variable of an
1343 intermediate type, and perform widen-shift on the intermediate type:
1345 type a_t;
1346 interm_type a_it;
1347 TYPE a_T, res_T, res_T';
1349 S1 a_t = ;
1350 S2 a_T = (TYPE) a_t;
1351 '--> a_it = (interm_type) a_t;
1352 S3 res_T = a_T << CONST;
1353 '--> res_T' = a_it <<* CONST;
1355 Input/Output:
1357 * STMTS: Contains a stmt from which the pattern search begins.
1358 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1359 in STMTS. When an intermediate type is used and a pattern statement is
1360 created for S2, we also put S2 here (before S3).
1362 Output:
1364 * TYPE_IN: The type of the input arguments to the pattern.
1366 * TYPE_OUT: The type of the output of this pattern.
1368 * Return value: A new stmt that will be used to replace the sequence of
1369 stmts that constitute the pattern. In this case it will be:
1370 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1372 static gimple
1373 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1374 tree *type_in, tree *type_out)
1376 gimple last_stmt = VEC_pop (gimple, *stmts);
1377 gimple def_stmt0;
1378 tree oprnd0, oprnd1;
1379 tree type, half_type0;
1380 gimple pattern_stmt, orig_stmt = NULL;
1381 tree vectype, vectype_out = NULL_TREE;
1382 tree dummy;
1383 tree var;
1384 enum tree_code dummy_code;
1385 int dummy_int;
1386 VEC (tree, heap) * dummy_vec;
1387 gimple use_stmt = NULL;
1388 bool over_widen = false;
1389 bool promotion;
1391 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1392 return NULL;
1394 orig_stmt = last_stmt;
1395 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1397 /* This statement was also detected as over-widening operation (it can't
1398 be any other pattern, because only over-widening detects shifts).
1399 LAST_STMT is the final type demotion statement, but its related
1400 statement is shift. We analyze the related statement to catch cases:
1402 orig code:
1403 type a_t;
1404 itype res;
1405 TYPE a_T, res_T;
1407 S1 a_T = (TYPE) a_t;
1408 S2 res_T = a_T << CONST;
1409 S3 res = (itype)res_T;
1411 (size of type * 2 <= size of itype
1412 and size of itype * 2 <= size of TYPE)
1414 code after over-widening pattern detection:
1416 S1 a_T = (TYPE) a_t;
1417 --> a_it = (itype) a_t;
1418 S2 res_T = a_T << CONST;
1419 S3 res = (itype)res_T; <--- LAST_STMT
1420 --> res = a_it << CONST;
1422 after widen_shift:
1424 S1 a_T = (TYPE) a_t;
1425 --> a_it = (itype) a_t; - redundant
1426 S2 res_T = a_T << CONST;
1427 S3 res = (itype)res_T;
1428 --> res = a_t w<< CONST;
1430 i.e., we replace the three statements with res = a_t w<< CONST. */
1431 last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1432 over_widen = true;
1435 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1436 return NULL;
1438 oprnd0 = gimple_assign_rhs1 (last_stmt);
1439 oprnd1 = gimple_assign_rhs2 (last_stmt);
1440 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1441 return NULL;
1443 /* Check operand 0: it has to be defined by a type promotion. */
1444 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1445 &promotion)
1446 || !promotion)
1447 return NULL;
1449 /* Check operand 1: has to be positive. We check that it fits the type
1450 in vect_handle_widen_op_by_const (). */
1451 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1452 return NULL;
1454 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1455 type = gimple_expr_type (last_stmt);
1457 /* Check if this a widening operation. */
1458 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1459 &oprnd0, stmts,
1460 type, &half_type0, def_stmt0))
1461 return NULL;
1463 /* Handle unsigned case. Look for
1464 S4 u_res_T = (unsigned TYPE) res_T;
1465 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1466 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1468 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1469 imm_use_iterator imm_iter;
1470 use_operand_p use_p;
1471 int nuses = 0;
1472 tree use_type;
1474 if (over_widen)
1476 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1477 We check here that TYPE is the correct type for the operation,
1478 i.e., it's the type of the original result. */
1479 tree orig_type = gimple_expr_type (orig_stmt);
1480 if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1481 || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1482 return NULL;
1484 else
1486 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1488 if (is_gimple_debug (USE_STMT (use_p)))
1489 continue;
1490 use_stmt = USE_STMT (use_p);
1491 nuses++;
1494 if (nuses != 1 || !is_gimple_assign (use_stmt)
1495 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1496 return NULL;
1498 use_lhs = gimple_assign_lhs (use_stmt);
1499 use_type = TREE_TYPE (use_lhs);
1501 if (!INTEGRAL_TYPE_P (use_type)
1502 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1503 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1504 return NULL;
1506 type = use_type;
1510 /* Pattern detected. */
1511 if (vect_print_dump_info (REPORT_DETAILS))
1512 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1514 /* Check target support. */
1515 vectype = get_vectype_for_scalar_type (half_type0);
1516 vectype_out = get_vectype_for_scalar_type (type);
1518 if (!vectype
1519 || !vectype_out
1520 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1521 vectype_out, vectype,
1522 &dummy, &dummy, &dummy_code,
1523 &dummy_code, &dummy_int,
1524 &dummy_vec))
1525 return NULL;
1527 *type_in = vectype;
1528 *type_out = vectype_out;
1530 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1531 var = vect_recog_temp_ssa_var (type, NULL);
1532 pattern_stmt =
1533 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1535 if (vect_print_dump_info (REPORT_DETAILS))
1536 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1538 if (use_stmt)
1539 last_stmt = use_stmt;
1540 else
1541 last_stmt = orig_stmt;
1543 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1544 return pattern_stmt;
1547 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1548 vectorized:
1550 type a_t;
1551 TYPE b_T, res_T;
1553 S1 a_t = ;
1554 S2 b_T = ;
1555 S3 res_T = b_T op a_t;
1557 where type 'TYPE' is a type with different size than 'type',
1558 and op is <<, >> or rotate.
1560 Also detect cases:
1562 type a_t;
1563 TYPE b_T, c_T, res_T;
1565 S0 c_T = ;
1566 S1 a_t = (type) c_T;
1567 S2 b_T = ;
1568 S3 res_T = b_T op a_t;
1570 Input/Output:
1572 * STMTS: Contains a stmt from which the pattern search begins,
1573 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1574 with a shift/rotate which has same type on both operands, in the
1575 second case just b_T op c_T, in the first case with added cast
1576 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1578 Output:
1580 * TYPE_IN: The type of the input arguments to the pattern.
1582 * TYPE_OUT: The type of the output of this pattern.
1584 * Return value: A new stmt that will be used to replace the shift/rotate
1585 S3 stmt. */
1587 static gimple
1588 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1589 tree *type_in, tree *type_out)
1591 gimple last_stmt = VEC_pop (gimple, *stmts);
1592 tree oprnd0, oprnd1, lhs, var;
1593 gimple pattern_stmt, def_stmt;
1594 enum tree_code rhs_code;
1595 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1596 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1597 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1598 enum vect_def_type dt;
1599 tree def;
1601 if (!is_gimple_assign (last_stmt))
1602 return NULL;
1604 rhs_code = gimple_assign_rhs_code (last_stmt);
1605 switch (rhs_code)
1607 case LSHIFT_EXPR:
1608 case RSHIFT_EXPR:
1609 case LROTATE_EXPR:
1610 case RROTATE_EXPR:
1611 break;
1612 default:
1613 return NULL;
1616 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1617 return NULL;
1619 lhs = gimple_assign_lhs (last_stmt);
1620 oprnd0 = gimple_assign_rhs1 (last_stmt);
1621 oprnd1 = gimple_assign_rhs2 (last_stmt);
1622 if (TREE_CODE (oprnd0) != SSA_NAME
1623 || TREE_CODE (oprnd1) != SSA_NAME
1624 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1625 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1626 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1627 || TYPE_PRECISION (TREE_TYPE (lhs))
1628 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1629 return NULL;
1631 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1632 &def, &dt))
1633 return NULL;
1635 if (dt != vect_internal_def)
1636 return NULL;
1638 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1639 *type_out = *type_in;
1640 if (*type_in == NULL_TREE)
1641 return NULL;
1643 def = NULL_TREE;
1644 if (gimple_assign_cast_p (def_stmt))
1646 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1647 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1648 && TYPE_PRECISION (TREE_TYPE (rhs1))
1649 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1650 def = rhs1;
1653 if (def == NULL_TREE)
1655 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1656 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1657 NULL_TREE);
1658 new_pattern_def_seq (stmt_vinfo, def_stmt);
1661 /* Pattern detected. */
1662 if (vect_print_dump_info (REPORT_DETAILS))
1663 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1665 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1666 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1667 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1669 if (vect_print_dump_info (REPORT_DETAILS))
1670 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1672 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1673 return pattern_stmt;
1676 /* Detect a signed division by power of two constant that wouldn't be
1677 otherwise vectorized:
1679 type a_t, b_t;
1681 S1 a_t = b_t / N;
1683 where type 'type' is a signed integral type and N is a constant positive
1684 power of two.
1686 Similarly handle signed modulo by power of two constant:
1688 S4 a_t = b_t % N;
1690 Input/Output:
1692 * STMTS: Contains a stmt from which the pattern search begins,
1693 i.e. the division stmt. S1 is replaced by:
1694 S3 y_t = b_t < 0 ? N - 1 : 0;
1695 S2 x_t = b_t + y_t;
1696 S1' a_t = x_t >> log2 (N);
1698 S4 is replaced by (where *_T temporaries have unsigned type):
1699 S9 y_T = b_t < 0 ? -1U : 0U;
1700 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1701 S7 z_t = (type) z_T;
1702 S6 w_t = b_t + z_t;
1703 S5 x_t = w_t & (N - 1);
1704 S4' a_t = x_t - z_t;
1706 Output:
1708 * TYPE_IN: The type of the input arguments to the pattern.
1710 * TYPE_OUT: The type of the output of this pattern.
1712 * Return value: A new stmt that will be used to replace the division
1713 S1 or modulo S4 stmt. */
1715 static gimple
1716 vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1717 tree *type_in, tree *type_out)
1719 gimple last_stmt = VEC_pop (gimple, *stmts);
1720 tree oprnd0, oprnd1, vectype, itype, cond;
1721 gimple pattern_stmt, def_stmt;
1722 enum tree_code rhs_code;
1723 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1724 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1725 optab optab;
1727 if (!is_gimple_assign (last_stmt))
1728 return NULL;
1730 rhs_code = gimple_assign_rhs_code (last_stmt);
1731 switch (rhs_code)
1733 case TRUNC_DIV_EXPR:
1734 case TRUNC_MOD_EXPR:
1735 break;
1736 default:
1737 return NULL;
1740 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1741 return NULL;
1743 oprnd0 = gimple_assign_rhs1 (last_stmt);
1744 oprnd1 = gimple_assign_rhs2 (last_stmt);
1745 itype = TREE_TYPE (oprnd0);
1746 if (TREE_CODE (oprnd0) != SSA_NAME
1747 || TREE_CODE (oprnd1) != INTEGER_CST
1748 || TREE_CODE (itype) != INTEGER_TYPE
1749 || TYPE_UNSIGNED (itype)
1750 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1751 || !integer_pow2p (oprnd1)
1752 || tree_int_cst_sgn (oprnd1) != 1)
1753 return NULL;
1755 vectype = get_vectype_for_scalar_type (itype);
1756 if (vectype == NULL_TREE)
1757 return NULL;
1759 /* If the target can handle vectorized division or modulo natively,
1760 don't attempt to optimize this. */
1761 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1762 if (optab != NULL)
1764 enum machine_mode vec_mode = TYPE_MODE (vectype);
1765 int icode = (int) optab_handler (optab, vec_mode);
1766 if (icode != CODE_FOR_nothing
1767 || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1768 return NULL;
1771 /* Pattern detected. */
1772 if (vect_print_dump_info (REPORT_DETAILS))
1773 fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1775 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1776 if (rhs_code == TRUNC_DIV_EXPR)
1778 tree var = vect_recog_temp_ssa_var (itype, NULL);
1779 def_stmt
1780 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1781 fold_build2 (MINUS_EXPR, itype,
1782 oprnd1,
1783 build_int_cst (itype,
1784 1)),
1785 build_int_cst (itype, 0));
1786 new_pattern_def_seq (stmt_vinfo, def_stmt);
1787 var = vect_recog_temp_ssa_var (itype, NULL);
1788 def_stmt
1789 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1790 gimple_assign_lhs (def_stmt));
1791 append_pattern_def_seq (stmt_vinfo, def_stmt);
1793 pattern_stmt
1794 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1795 vect_recog_temp_ssa_var (itype, NULL),
1796 var,
1797 build_int_cst (itype,
1798 tree_log2 (oprnd1)));
1800 else
1802 tree signmask;
1803 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1804 if (compare_tree_int (oprnd1, 2) == 0)
1806 signmask = vect_recog_temp_ssa_var (itype, NULL);
1807 def_stmt
1808 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1809 build_int_cst (itype, 1),
1810 build_int_cst (itype, 0));
1811 append_pattern_def_seq (stmt_vinfo, def_stmt);
1813 else
1815 tree utype
1816 = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1817 tree vecutype = get_vectype_for_scalar_type (utype);
1818 tree shift
1819 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1820 - tree_log2 (oprnd1));
1821 tree var = vect_recog_temp_ssa_var (utype, NULL);
1822 stmt_vec_info def_stmt_vinfo;
1824 def_stmt
1825 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1826 build_int_cst (utype, -1),
1827 build_int_cst (utype, 0));
1828 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1829 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1830 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1831 append_pattern_def_seq (stmt_vinfo, def_stmt);
1832 var = vect_recog_temp_ssa_var (utype, NULL);
1833 def_stmt
1834 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1835 gimple_assign_lhs (def_stmt),
1836 shift);
1837 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1838 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1839 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1840 append_pattern_def_seq (stmt_vinfo, def_stmt);
1841 signmask = vect_recog_temp_ssa_var (itype, NULL);
1842 def_stmt
1843 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1844 NULL_TREE);
1845 append_pattern_def_seq (stmt_vinfo, def_stmt);
1847 def_stmt
1848 = gimple_build_assign_with_ops (PLUS_EXPR,
1849 vect_recog_temp_ssa_var (itype, NULL),
1850 oprnd0, signmask);
1851 append_pattern_def_seq (stmt_vinfo, def_stmt);
1852 def_stmt
1853 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1854 vect_recog_temp_ssa_var (itype, NULL),
1855 gimple_assign_lhs (def_stmt),
1856 fold_build2 (MINUS_EXPR, itype,
1857 oprnd1,
1858 build_int_cst (itype,
1859 1)));
1860 append_pattern_def_seq (stmt_vinfo, def_stmt);
1862 pattern_stmt
1863 = gimple_build_assign_with_ops (MINUS_EXPR,
1864 vect_recog_temp_ssa_var (itype, NULL),
1865 gimple_assign_lhs (def_stmt),
1866 signmask);
1869 if (vect_print_dump_info (REPORT_DETAILS))
1870 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1872 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1874 *type_in = vectype;
1875 *type_out = vectype;
1876 return pattern_stmt;
1879 /* Function vect_recog_mixed_size_cond_pattern
1881 Try to find the following pattern:
1883 type x_t, y_t;
1884 TYPE a_T, b_T, c_T;
1885 loop:
1886 S1 a_T = x_t CMP y_t ? b_T : c_T;
1888 where type 'TYPE' is an integral type which has different size
1889 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
1890 than 'type', the constants need to fit into an integer type
1891 with the same width as 'type') or results of conversion from 'type'.
1893 Input:
1895 * LAST_STMT: A stmt from which the pattern search begins.
1897 Output:
1899 * TYPE_IN: The type of the input arguments to the pattern.
1901 * TYPE_OUT: The type of the output of this pattern.
1903 * Return value: A new stmt that will be used to replace the pattern.
1904 Additionally a def_stmt is added.
1906 a_it = x_t CMP y_t ? b_it : c_it;
1907 a_T = (TYPE) a_it; */
1909 static gimple
1910 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1911 tree *type_out)
1913 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1914 tree cond_expr, then_clause, else_clause;
1915 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1916 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
1917 enum machine_mode cmpmode;
1918 gimple pattern_stmt, def_stmt;
1919 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1920 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1921 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
1922 gimple def_stmt0 = NULL, def_stmt1 = NULL;
1923 bool promotion;
1924 tree comp_scalar_type;
1926 if (!is_gimple_assign (last_stmt)
1927 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1928 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1929 return NULL;
1931 cond_expr = gimple_assign_rhs1 (last_stmt);
1932 then_clause = gimple_assign_rhs2 (last_stmt);
1933 else_clause = gimple_assign_rhs3 (last_stmt);
1935 if (!COMPARISON_CLASS_P (cond_expr))
1936 return NULL;
1938 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
1939 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
1940 if (comp_vectype == NULL_TREE)
1941 return NULL;
1943 type = gimple_expr_type (last_stmt);
1944 if (types_compatible_p (type, comp_scalar_type)
1945 || ((TREE_CODE (then_clause) != INTEGER_CST
1946 || TREE_CODE (else_clause) != INTEGER_CST)
1947 && !INTEGRAL_TYPE_P (comp_scalar_type))
1948 || !INTEGRAL_TYPE_P (type))
1949 return NULL;
1951 if ((TREE_CODE (then_clause) != INTEGER_CST
1952 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
1953 &def_stmt0, &promotion))
1954 || (TREE_CODE (else_clause) != INTEGER_CST
1955 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
1956 &def_stmt1, &promotion)))
1957 return NULL;
1959 if (orig_type0 && orig_type1
1960 && !types_compatible_p (orig_type0, orig_type1))
1961 return NULL;
1963 if (orig_type0)
1965 if (!types_compatible_p (orig_type0, comp_scalar_type))
1966 return NULL;
1967 then_clause = gimple_assign_rhs1 (def_stmt0);
1968 itype = orig_type0;
1971 if (orig_type1)
1973 if (!types_compatible_p (orig_type1, comp_scalar_type))
1974 return NULL;
1975 else_clause = gimple_assign_rhs1 (def_stmt1);
1976 itype = orig_type1;
1979 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1981 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1982 return NULL;
1984 vectype = get_vectype_for_scalar_type (type);
1985 if (vectype == NULL_TREE)
1986 return NULL;
1988 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1989 return NULL;
1991 if (itype == NULL_TREE)
1992 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1993 TYPE_UNSIGNED (type));
1995 if (itype == NULL_TREE
1996 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1997 return NULL;
1999 vecitype = get_vectype_for_scalar_type (itype);
2000 if (vecitype == NULL_TREE)
2001 return NULL;
2003 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2004 return NULL;
2006 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2008 if ((TREE_CODE (then_clause) == INTEGER_CST
2009 && !int_fits_type_p (then_clause, itype))
2010 || (TREE_CODE (else_clause) == INTEGER_CST
2011 && !int_fits_type_p (else_clause, itype)))
2012 return NULL;
2015 def_stmt
2016 = gimple_build_assign_with_ops3 (COND_EXPR,
2017 vect_recog_temp_ssa_var (itype, NULL),
2018 unshare_expr (cond_expr),
2019 fold_convert (itype, then_clause),
2020 fold_convert (itype, else_clause));
2021 pattern_stmt
2022 = gimple_build_assign_with_ops (NOP_EXPR,
2023 vect_recog_temp_ssa_var (type, NULL),
2024 gimple_assign_lhs (def_stmt), NULL_TREE);
2026 new_pattern_def_seq (stmt_vinfo, def_stmt);
2027 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2028 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2029 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2030 *type_in = vecitype;
2031 *type_out = vectype;
2033 if (vect_print_dump_info (REPORT_DETAILS))
2034 fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: ");
2036 return pattern_stmt;
2040 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2041 true if bool VAR can be optimized that way. */
2043 static bool
2044 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2046 gimple def_stmt;
2047 enum vect_def_type dt;
2048 tree def, rhs1;
2049 enum tree_code rhs_code;
2051 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2052 &dt))
2053 return false;
2055 if (dt != vect_internal_def)
2056 return false;
2058 if (!is_gimple_assign (def_stmt))
2059 return false;
2061 if (!has_single_use (def))
2062 return false;
2064 rhs1 = gimple_assign_rhs1 (def_stmt);
2065 rhs_code = gimple_assign_rhs_code (def_stmt);
2066 switch (rhs_code)
2068 case SSA_NAME:
2069 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2071 CASE_CONVERT:
2072 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2073 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2074 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2075 return false;
2076 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2078 case BIT_NOT_EXPR:
2079 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2081 case BIT_AND_EXPR:
2082 case BIT_IOR_EXPR:
2083 case BIT_XOR_EXPR:
2084 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2085 return false;
2086 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2087 bb_vinfo);
2089 default:
2090 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2092 tree vecitype, comp_vectype;
2094 /* If the comparison can throw, then is_gimple_condexpr will be
2095 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2096 if (stmt_could_throw_p (def_stmt))
2097 return false;
2099 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2100 if (comp_vectype == NULL_TREE)
2101 return false;
2103 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2105 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2106 tree itype
2107 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2108 vecitype = get_vectype_for_scalar_type (itype);
2109 if (vecitype == NULL_TREE)
2110 return false;
2112 else
2113 vecitype = comp_vectype;
2114 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2116 return false;
2121 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2122 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2123 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2125 static tree
2126 adjust_bool_pattern_cast (tree type, tree var)
2128 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2129 gimple cast_stmt, pattern_stmt;
2131 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2132 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2133 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2134 cast_stmt
2135 = gimple_build_assign_with_ops (NOP_EXPR,
2136 vect_recog_temp_ssa_var (type, NULL),
2137 gimple_assign_lhs (pattern_stmt),
2138 NULL_TREE);
2139 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2140 return gimple_assign_lhs (cast_stmt);
2144 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2145 recursively. VAR is an SSA_NAME that should be transformed from bool
2146 to a wider integer type, OUT_TYPE is the desired final integer type of
2147 the whole pattern, TRUEVAL should be NULL unless optimizing
2148 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2149 in the then_clause, STMTS is where statements with added pattern stmts
2150 should be pushed to. */
2152 static tree
2153 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2154 VEC (gimple, heap) **stmts)
2156 gimple stmt = SSA_NAME_DEF_STMT (var);
2157 enum tree_code rhs_code, def_rhs_code;
2158 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2159 location_t loc;
2160 gimple pattern_stmt, def_stmt;
2162 rhs1 = gimple_assign_rhs1 (stmt);
2163 rhs2 = gimple_assign_rhs2 (stmt);
2164 rhs_code = gimple_assign_rhs_code (stmt);
2165 loc = gimple_location (stmt);
2166 switch (rhs_code)
2168 case SSA_NAME:
2169 CASE_CONVERT:
2170 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2171 itype = TREE_TYPE (irhs1);
2172 pattern_stmt
2173 = gimple_build_assign_with_ops (SSA_NAME,
2174 vect_recog_temp_ssa_var (itype, NULL),
2175 irhs1, NULL_TREE);
2176 break;
2178 case BIT_NOT_EXPR:
2179 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2180 itype = TREE_TYPE (irhs1);
2181 pattern_stmt
2182 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2183 vect_recog_temp_ssa_var (itype, NULL),
2184 irhs1, build_int_cst (itype, 1));
2185 break;
2187 case BIT_AND_EXPR:
2188 /* Try to optimize x = y & (a < b ? 1 : 0); into
2189 x = (a < b ? y : 0);
2191 E.g. for:
2192 bool a_b, b_b, c_b;
2193 TYPE d_T;
2195 S1 a_b = x1 CMP1 y1;
2196 S2 b_b = x2 CMP2 y2;
2197 S3 c_b = a_b & b_b;
2198 S4 d_T = (TYPE) c_b;
2200 we would normally emit:
2202 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2203 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2204 S3' c_T = a_T & b_T;
2205 S4' d_T = c_T;
2207 but we can save one stmt by using the
2208 result of one of the COND_EXPRs in the other COND_EXPR and leave
2209 BIT_AND_EXPR stmt out:
2211 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2212 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2213 S4' f_T = c_T;
2215 At least when VEC_COND_EXPR is implemented using masks
2216 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2217 computes the comparison masks and ands it, in one case with
2218 all ones vector, in the other case with a vector register.
2219 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2220 often more expensive. */
2221 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2222 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2223 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2225 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2226 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2227 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2228 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2230 gimple tstmt;
2231 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2232 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2233 tstmt = VEC_pop (gimple, *stmts);
2234 gcc_assert (tstmt == def_stmt);
2235 VEC_quick_push (gimple, *stmts, stmt);
2236 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2237 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2238 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2239 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2240 return irhs2;
2242 else
2243 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2244 goto and_ior_xor;
2246 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2247 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2248 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2250 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2251 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2252 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2253 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2255 gimple tstmt;
2256 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2257 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2258 tstmt = VEC_pop (gimple, *stmts);
2259 gcc_assert (tstmt == def_stmt);
2260 VEC_quick_push (gimple, *stmts, stmt);
2261 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2262 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2263 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2264 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2265 return irhs1;
2267 else
2268 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2269 goto and_ior_xor;
2271 /* FALLTHRU */
2272 case BIT_IOR_EXPR:
2273 case BIT_XOR_EXPR:
2274 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2275 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2276 and_ior_xor:
2277 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2278 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2280 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2281 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2282 int out_prec = TYPE_PRECISION (out_type);
2283 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2284 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2285 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2286 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2287 else
2289 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2290 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2293 itype = TREE_TYPE (irhs1);
2294 pattern_stmt
2295 = gimple_build_assign_with_ops (rhs_code,
2296 vect_recog_temp_ssa_var (itype, NULL),
2297 irhs1, irhs2);
2298 break;
2300 default:
2301 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2302 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2303 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2304 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2305 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2307 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2308 itype
2309 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2311 else
2312 itype = TREE_TYPE (rhs1);
2313 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2314 if (trueval == NULL_TREE)
2315 trueval = build_int_cst (itype, 1);
2316 else
2317 gcc_checking_assert (useless_type_conversion_p (itype,
2318 TREE_TYPE (trueval)));
2319 pattern_stmt
2320 = gimple_build_assign_with_ops3 (COND_EXPR,
2321 vect_recog_temp_ssa_var (itype, NULL),
2322 cond_expr, trueval,
2323 build_int_cst (itype, 0));
2324 break;
2327 VEC_safe_push (gimple, heap, *stmts, stmt);
2328 gimple_set_location (pattern_stmt, loc);
2329 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2330 return gimple_assign_lhs (pattern_stmt);
2334 /* Function vect_recog_bool_pattern
2336 Try to find pattern like following:
2338 bool a_b, b_b, c_b, d_b, e_b;
2339 TYPE f_T;
2340 loop:
2341 S1 a_b = x1 CMP1 y1;
2342 S2 b_b = x2 CMP2 y2;
2343 S3 c_b = a_b & b_b;
2344 S4 d_b = x3 CMP3 y3;
2345 S5 e_b = c_b | d_b;
2346 S6 f_T = (TYPE) e_b;
2348 where type 'TYPE' is an integral type.
2350 Input:
2352 * LAST_STMT: A stmt at the end from which the pattern
2353 search begins, i.e. cast of a bool to
2354 an integer type.
2356 Output:
2358 * TYPE_IN: The type of the input arguments to the pattern.
2360 * TYPE_OUT: The type of the output of this pattern.
2362 * Return value: A new stmt that will be used to replace the pattern.
2364 Assuming size of TYPE is the same as size of all comparisons
2365 (otherwise some casts would be added where needed), the above
2366 sequence we create related pattern stmts:
2367 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2368 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2369 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2370 S5' e_T = c_T | d_T;
2371 S6' f_T = e_T;
2373 Instead of the above S3' we could emit:
2374 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2375 S3' c_T = a_T | b_T;
2376 but the above is more efficient. */
2378 static gimple
2379 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2380 tree *type_out)
2382 gimple last_stmt = VEC_pop (gimple, *stmts);
2383 enum tree_code rhs_code;
2384 tree var, lhs, rhs, vectype;
2385 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2386 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2387 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2388 gimple pattern_stmt;
2390 if (!is_gimple_assign (last_stmt))
2391 return NULL;
2393 var = gimple_assign_rhs1 (last_stmt);
2394 lhs = gimple_assign_lhs (last_stmt);
2396 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2397 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2398 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2399 return NULL;
2401 rhs_code = gimple_assign_rhs_code (last_stmt);
2402 if (CONVERT_EXPR_CODE_P (rhs_code))
2404 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2405 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2406 return NULL;
2407 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2408 if (vectype == NULL_TREE)
2409 return NULL;
2411 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2412 return NULL;
2414 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2415 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2416 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2417 pattern_stmt
2418 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2419 else
2420 pattern_stmt
2421 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2422 *type_out = vectype;
2423 *type_in = vectype;
2424 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2425 if (vect_print_dump_info (REPORT_DETAILS))
2426 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2428 return pattern_stmt;
2430 else if (rhs_code == SSA_NAME
2431 && STMT_VINFO_DATA_REF (stmt_vinfo))
2433 stmt_vec_info pattern_stmt_info;
2434 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2435 gcc_assert (vectype != NULL_TREE);
2436 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2437 return NULL;
2438 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2439 return NULL;
2441 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2442 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2443 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2445 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2446 gimple cast_stmt
2447 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2448 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2449 rhs = rhs2;
2451 pattern_stmt
2452 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2453 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2454 bb_vinfo);
2455 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2456 STMT_VINFO_DATA_REF (pattern_stmt_info)
2457 = STMT_VINFO_DATA_REF (stmt_vinfo);
2458 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2459 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2460 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2461 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2462 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2463 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2464 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2465 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2466 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2467 *type_out = vectype;
2468 *type_in = vectype;
2469 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2470 if (vect_print_dump_info (REPORT_DETAILS))
2471 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2472 return pattern_stmt;
2474 else
2475 return NULL;
2479 /* Mark statements that are involved in a pattern. */
2481 static inline void
2482 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2483 tree pattern_vectype)
2485 stmt_vec_info pattern_stmt_info, def_stmt_info;
2486 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2487 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2488 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2489 gimple def_stmt;
2491 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2492 if (pattern_stmt_info == NULL)
2494 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2495 bb_vinfo);
2496 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2498 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2500 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2501 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2502 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2503 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2504 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2505 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2506 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2507 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2508 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2510 gimple_stmt_iterator si;
2511 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2512 !gsi_end_p (si); gsi_next (&si))
2514 def_stmt = gsi_stmt (si);
2515 def_stmt_info = vinfo_for_stmt (def_stmt);
2516 if (def_stmt_info == NULL)
2518 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2519 bb_vinfo);
2520 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2522 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2523 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2524 STMT_VINFO_DEF_TYPE (def_stmt_info)
2525 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2526 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2527 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2532 /* Function vect_pattern_recog_1
2534 Input:
2535 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2536 computation pattern.
2537 STMT: A stmt from which the pattern search should start.
2539 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2540 expression that computes the same functionality and can be used to
2541 replace the sequence of stmts that are involved in the pattern.
2543 Output:
2544 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2545 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2546 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2547 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2548 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2549 to the available target pattern.
2551 This function also does some bookkeeping, as explained in the documentation
2552 for vect_recog_pattern. */
2554 static void
2555 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2556 gimple_stmt_iterator si,
2557 VEC (gimple, heap) **stmts_to_replace)
2559 gimple stmt = gsi_stmt (si), pattern_stmt;
2560 stmt_vec_info stmt_info;
2561 loop_vec_info loop_vinfo;
2562 tree pattern_vectype;
2563 tree type_in, type_out;
2564 enum tree_code code;
2565 int i;
2566 gimple next;
2568 VEC_truncate (gimple, *stmts_to_replace, 0);
2569 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2570 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2571 if (!pattern_stmt)
2572 return;
2574 stmt = VEC_last (gimple, *stmts_to_replace);
2575 stmt_info = vinfo_for_stmt (stmt);
2576 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2578 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2580 /* No need to check target support (already checked by the pattern
2581 recognition function). */
2582 pattern_vectype = type_out ? type_out : type_in;
2584 else
2586 enum machine_mode vec_mode;
2587 enum insn_code icode;
2588 optab optab;
2590 /* Check target support */
2591 type_in = get_vectype_for_scalar_type (type_in);
2592 if (!type_in)
2593 return;
2594 if (type_out)
2595 type_out = get_vectype_for_scalar_type (type_out);
2596 else
2597 type_out = type_in;
2598 if (!type_out)
2599 return;
2600 pattern_vectype = type_out;
2602 if (is_gimple_assign (pattern_stmt))
2603 code = gimple_assign_rhs_code (pattern_stmt);
2604 else
2606 gcc_assert (is_gimple_call (pattern_stmt));
2607 code = CALL_EXPR;
2610 optab = optab_for_tree_code (code, type_in, optab_default);
2611 vec_mode = TYPE_MODE (type_in);
2612 if (!optab
2613 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2614 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2615 return;
2618 /* Found a vectorizable pattern. */
2619 if (vect_print_dump_info (REPORT_DETAILS))
2621 fprintf (vect_dump, "pattern recognized: ");
2622 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2625 /* Mark the stmts that are involved in the pattern. */
2626 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2628 /* Patterns cannot be vectorized using SLP, because they change the order of
2629 computation. */
2630 if (loop_vinfo)
2631 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2632 if (next == stmt)
2633 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2635 /* It is possible that additional pattern stmts are created and inserted in
2636 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2637 relevant statements. */
2638 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2639 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2640 i++)
2642 stmt_info = vinfo_for_stmt (stmt);
2643 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2644 if (vect_print_dump_info (REPORT_DETAILS))
2646 fprintf (vect_dump, "additional pattern stmt: ");
2647 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2650 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2655 /* Function vect_pattern_recog
2657 Input:
2658 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2659 computation idioms.
2661 Output - for each computation idiom that is detected we create a new stmt
2662 that provides the same functionality and that can be vectorized. We
2663 also record some information in the struct_stmt_info of the relevant
2664 stmts, as explained below:
2666 At the entry to this function we have the following stmts, with the
2667 following initial value in the STMT_VINFO fields:
2669 stmt in_pattern_p related_stmt vec_stmt
2670 S1: a_i = .... - - -
2671 S2: a_2 = ..use(a_i).. - - -
2672 S3: a_1 = ..use(a_2).. - - -
2673 S4: a_0 = ..use(a_1).. - - -
2674 S5: ... = ..use(a_0).. - - -
2676 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2677 represented by a single stmt. We then:
2678 - create a new stmt S6 equivalent to the pattern (the stmt is not
2679 inserted into the code)
2680 - fill in the STMT_VINFO fields as follows:
2682 in_pattern_p related_stmt vec_stmt
2683 S1: a_i = .... - - -
2684 S2: a_2 = ..use(a_i).. - - -
2685 S3: a_1 = ..use(a_2).. - - -
2686 S4: a_0 = ..use(a_1).. true S6 -
2687 '---> S6: a_new = .... - S4 -
2688 S5: ... = ..use(a_0).. - - -
2690 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2691 to each other through the RELATED_STMT field).
2693 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2694 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2695 remain irrelevant unless used by stmts other than S4.
2697 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2698 (because they are marked as irrelevant). It will vectorize S6, and record
2699 a pointer to the new vector stmt VS6 from S6 (as usual).
2700 S4 will be skipped, and S5 will be vectorized as usual:
2702 in_pattern_p related_stmt vec_stmt
2703 S1: a_i = .... - - -
2704 S2: a_2 = ..use(a_i).. - - -
2705 S3: a_1 = ..use(a_2).. - - -
2706 > VS6: va_new = .... - - -
2707 S4: a_0 = ..use(a_1).. true S6 VS6
2708 '---> S6: a_new = .... - S4 VS6
2709 > VS5: ... = ..vuse(va_new).. - - -
2710 S5: ... = ..use(a_0).. - - -
2712 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2713 elsewhere), and we'll end up with:
2715 VS6: va_new = ....
2716 VS5: ... = ..vuse(va_new)..
2718 In case of more than one pattern statements, e.g., widen-mult with
2719 intermediate type:
2721 S1 a_t = ;
2722 S2 a_T = (TYPE) a_t;
2723 '--> S3: a_it = (interm_type) a_t;
2724 S4 prod_T = a_T * CONST;
2725 '--> S5: prod_T' = a_it w* CONST;
2727 there may be other users of a_T outside the pattern. In that case S2 will
2728 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2729 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2730 be recorded in S3. */
2732 void
2733 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2735 struct loop *loop;
2736 basic_block *bbs, bb;
2737 unsigned int nbbs;
2738 gimple_stmt_iterator si;
2739 unsigned int i, j;
2740 vect_recog_func_ptr vect_recog_func;
2741 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2742 gimple stmt;
2744 if (vect_print_dump_info (REPORT_DETAILS))
2745 fprintf (vect_dump, "=== vect_pattern_recog ===");
2747 if (loop_vinfo)
2749 loop = LOOP_VINFO_LOOP (loop_vinfo);
2750 bbs = LOOP_VINFO_BBS (loop_vinfo);
2751 nbbs = loop->num_nodes;
2753 else
2755 bb = BB_VINFO_BB (bb_vinfo);
2756 nbbs = 1;
2757 bbs = XNEW (basic_block);
2758 bbs[0] = bb;
2761 /* Scan through the loop stmts, applying the pattern recognition
2762 functions starting at each stmt visited: */
2763 for (i = 0; i < nbbs; i++)
2765 basic_block bb = bbs[i];
2766 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2768 if (bb_vinfo && (stmt = gsi_stmt (si))
2769 && vinfo_for_stmt (stmt)
2770 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2771 continue;
2773 /* Scan over all generic vect_recog_xxx_pattern functions. */
2774 for (j = 0; j < NUM_PATTERNS; j++)
2776 vect_recog_func = vect_vect_recog_func_ptrs[j];
2777 vect_pattern_recog_1 (vect_recog_func, si,
2778 &stmts_to_replace);
2783 VEC_free (gimple, heap, stmts_to_replace);
2784 if (bb_vinfo)
2785 free (bbs);