31370.cc: Skip this test on powerpc64-*-freebsd*.
[official-gcc.git] / gcc / tree-vect-patterns.c
blobb871c0ba3615565fcf4dd115d924951052807a8d
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 tree *, tree *);
56 static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_over_widening_pattern,
67 vect_recog_widen_shift_pattern,
68 vect_recog_vector_vector_shift_pattern,
69 vect_recog_sdivmod_pow2_pattern,
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
73 static inline void
74 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 stmt);
80 static inline void
81 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
87 /* Check whether NAME, an ssa-name used in USE_STMT,
88 is a result of a type promotion or demotion, such that:
89 DEF_STMT: NAME = NOP (name0)
90 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
91 If CHECK_SIGN is TRUE, check that either both types are signed or both are
92 unsigned. */
94 static bool
95 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
96 tree *orig_type, gimple *def_stmt, bool *promotion)
98 tree dummy;
99 gimple dummy_gimple;
100 loop_vec_info loop_vinfo;
101 stmt_vec_info stmt_vinfo;
102 tree type = TREE_TYPE (name);
103 tree oprnd0;
104 enum vect_def_type dt;
105 tree def;
106 bb_vec_info bb_vinfo;
108 stmt_vinfo = vinfo_for_stmt (use_stmt);
109 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
110 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
111 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
112 &def, &dt))
113 return false;
115 if (dt != vect_internal_def
116 && dt != vect_external_def && dt != vect_constant_def)
117 return false;
119 if (!*def_stmt)
120 return false;
122 if (!is_gimple_assign (*def_stmt))
123 return false;
125 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
126 return false;
128 oprnd0 = gimple_assign_rhs1 (*def_stmt);
130 *orig_type = TREE_TYPE (oprnd0);
131 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
132 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
133 return false;
135 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
136 *promotion = true;
137 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
138 *promotion = false;
139 else
140 return false;
142 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
143 bb_vinfo, &dummy_gimple, &dummy, &dt))
144 return false;
146 return true;
149 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
150 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
152 static tree
153 vect_recog_temp_ssa_var (tree type, gimple stmt)
155 tree var = create_tmp_var (type, "patt");
157 add_referenced_var (var);
158 var = make_ssa_name (var, stmt);
159 return var;
162 /* Function vect_recog_dot_prod_pattern
164 Try to find the following pattern:
166 type x_t, y_t;
167 TYPE1 prod;
168 TYPE2 sum = init;
169 loop:
170 sum_0 = phi <init, sum_1>
171 S1 x_t = ...
172 S2 y_t = ...
173 S3 x_T = (TYPE1) x_t;
174 S4 y_T = (TYPE1) y_t;
175 S5 prod = x_T * y_T;
176 [S6 prod = (TYPE2) prod; #optional]
177 S7 sum_1 = prod + sum_0;
179 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
180 same size of 'TYPE1' or bigger. This is a special case of a reduction
181 computation.
183 Input:
185 * STMTS: Contains a stmt from which the pattern search begins. In the
186 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
187 will be detected.
189 Output:
191 * TYPE_IN: The type of the input arguments to the pattern.
193 * TYPE_OUT: The type of the output of this pattern.
195 * Return value: A new stmt that will be used to replace the sequence of
196 stmts that constitute the pattern. In this case it will be:
197 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
199 Note: The dot-prod idiom is a widening reduction pattern that is
200 vectorized without preserving all the intermediate results. It
201 produces only N/2 (widened) results (by summing up pairs of
202 intermediate results) rather than all N results. Therefore, we
203 cannot allow this pattern when we want to get all the results and in
204 the correct order (as is the case when this computation is in an
205 inner-loop nested in an outer-loop that us being vectorized). */
207 static gimple
208 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
209 tree *type_out)
211 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
212 tree oprnd0, oprnd1;
213 tree oprnd00, oprnd01;
214 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
215 tree type, half_type;
216 gimple pattern_stmt;
217 tree prod_type;
218 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
219 struct loop *loop;
220 tree var;
221 bool promotion;
223 if (!loop_info)
224 return NULL;
226 loop = LOOP_VINFO_LOOP (loop_info);
228 if (!is_gimple_assign (last_stmt))
229 return NULL;
231 type = gimple_expr_type (last_stmt);
233 /* Look for the following pattern
234 DX = (TYPE1) X;
235 DY = (TYPE1) Y;
236 DPROD = DX * DY;
237 DDPROD = (TYPE2) DPROD;
238 sum_1 = DDPROD + sum_0;
239 In which
240 - DX is double the size of X
241 - DY is double the size of Y
242 - DX, DY, DPROD all have the same type
243 - sum is the same size of DPROD or bigger
244 - sum has been recognized as a reduction variable.
246 This is equivalent to:
247 DPROD = X w* Y; #widen mult
248 sum_1 = DPROD w+ sum_0; #widen summation
250 DPROD = X w* Y; #widen mult
251 sum_1 = DPROD + sum_0; #summation
254 /* Starting from LAST_STMT, follow the defs of its uses in search
255 of the above pattern. */
257 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
258 return NULL;
260 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
262 /* Has been detected as widening-summation? */
264 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
265 type = gimple_expr_type (stmt);
266 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
267 return NULL;
268 oprnd0 = gimple_assign_rhs1 (stmt);
269 oprnd1 = gimple_assign_rhs2 (stmt);
270 half_type = TREE_TYPE (oprnd0);
272 else
274 gimple def_stmt;
276 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
277 return NULL;
278 oprnd0 = gimple_assign_rhs1 (last_stmt);
279 oprnd1 = gimple_assign_rhs2 (last_stmt);
280 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
281 || !types_compatible_p (TREE_TYPE (oprnd1), type))
282 return NULL;
283 stmt = last_stmt;
285 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
286 &promotion)
287 && promotion)
289 stmt = def_stmt;
290 oprnd0 = gimple_assign_rhs1 (stmt);
292 else
293 half_type = type;
296 /* So far so good. Since last_stmt was detected as a (summation) reduction,
297 we know that oprnd1 is the reduction variable (defined by a loop-header
298 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
299 Left to check that oprnd0 is defined by a (widen_)mult_expr */
300 if (TREE_CODE (oprnd0) != SSA_NAME)
301 return NULL;
303 prod_type = half_type;
304 stmt = SSA_NAME_DEF_STMT (oprnd0);
306 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
307 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
308 return NULL;
310 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
311 inside the loop (in case we are analyzing an outer-loop). */
312 if (!is_gimple_assign (stmt))
313 return NULL;
314 stmt_vinfo = vinfo_for_stmt (stmt);
315 gcc_assert (stmt_vinfo);
316 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
317 return NULL;
318 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
319 return NULL;
320 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
322 /* Has been detected as a widening multiplication? */
324 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
325 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
326 return NULL;
327 stmt_vinfo = vinfo_for_stmt (stmt);
328 gcc_assert (stmt_vinfo);
329 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
330 oprnd00 = gimple_assign_rhs1 (stmt);
331 oprnd01 = gimple_assign_rhs2 (stmt);
333 else
335 tree half_type0, half_type1;
336 gimple def_stmt;
337 tree oprnd0, oprnd1;
339 oprnd0 = gimple_assign_rhs1 (stmt);
340 oprnd1 = gimple_assign_rhs2 (stmt);
341 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
342 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
343 return NULL;
344 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
345 &promotion)
346 || !promotion)
347 return NULL;
348 oprnd00 = gimple_assign_rhs1 (def_stmt);
349 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
350 &promotion)
351 || !promotion)
352 return NULL;
353 oprnd01 = gimple_assign_rhs1 (def_stmt);
354 if (!types_compatible_p (half_type0, half_type1))
355 return NULL;
356 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
357 return NULL;
360 half_type = TREE_TYPE (oprnd00);
361 *type_in = half_type;
362 *type_out = type;
364 /* Pattern detected. Create a stmt to be used to replace the pattern: */
365 var = vect_recog_temp_ssa_var (type, NULL);
366 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
367 oprnd00, oprnd01, oprnd1);
369 if (vect_print_dump_info (REPORT_DETAILS))
371 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
372 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
375 /* We don't allow changing the order of the computation in the inner-loop
376 when doing outer-loop vectorization. */
377 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
379 return pattern_stmt;
383 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
384 and LSHIFT_EXPR.
386 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
387 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
389 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
390 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
391 that satisfies the above restrictions, we can perform a widening opeartion
392 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
393 with a_it = (interm_type) a_t; */
395 static bool
396 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
397 tree const_oprnd, tree *oprnd,
398 VEC (gimple, heap) **stmts, tree type,
399 tree *half_type, gimple def_stmt)
401 tree new_type, new_oprnd, tmp;
402 gimple new_stmt;
403 loop_vec_info loop_vinfo;
404 struct loop *loop = NULL;
405 bb_vec_info bb_vinfo;
406 stmt_vec_info stmt_vinfo;
408 stmt_vinfo = vinfo_for_stmt (stmt);
409 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
410 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
411 if (loop_vinfo)
412 loop = LOOP_VINFO_LOOP (loop_vinfo);
414 if (code != MULT_EXPR && code != LSHIFT_EXPR)
415 return false;
417 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
418 || (code == LSHIFT_EXPR
419 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
420 != 1))
421 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
423 /* CONST_OPRND is a constant of HALF_TYPE. */
424 *oprnd = gimple_assign_rhs1 (def_stmt);
425 return true;
428 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
429 || !gimple_bb (def_stmt)
430 || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
431 || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_vinfo)
432 && gimple_code (def_stmt) != GIMPLE_PHI)
433 || !vinfo_for_stmt (def_stmt))
434 return false;
436 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
437 a type 2 times bigger than HALF_TYPE. */
438 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
439 TYPE_UNSIGNED (type));
440 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
441 || (code == LSHIFT_EXPR
442 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
443 return false;
445 /* Use NEW_TYPE for widening operation. */
446 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
448 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
449 /* Check if the already created pattern stmt is what we need. */
450 if (!is_gimple_assign (new_stmt)
451 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
452 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
453 return false;
455 VEC_safe_push (gimple, heap, *stmts, def_stmt);
456 *oprnd = gimple_assign_lhs (new_stmt);
458 else
460 /* Create a_T = (NEW_TYPE) a_t; */
461 *oprnd = gimple_assign_rhs1 (def_stmt);
462 tmp = create_tmp_var (new_type, NULL);
463 add_referenced_var (tmp);
464 new_oprnd = make_ssa_name (tmp, NULL);
465 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
466 NULL_TREE);
467 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
468 VEC_safe_push (gimple, heap, *stmts, def_stmt);
469 *oprnd = new_oprnd;
472 *half_type = new_type;
473 return true;
477 /* Function vect_recog_widen_mult_pattern
479 Try to find the following pattern:
481 type a_t, b_t;
482 TYPE a_T, b_T, prod_T;
484 S1 a_t = ;
485 S2 b_t = ;
486 S3 a_T = (TYPE) a_t;
487 S4 b_T = (TYPE) b_t;
488 S5 prod_T = a_T * b_T;
490 where type 'TYPE' is at least double the size of type 'type'.
492 Also detect unsigned cases:
494 unsigned type a_t, b_t;
495 unsigned TYPE u_prod_T;
496 TYPE a_T, b_T, prod_T;
498 S1 a_t = ;
499 S2 b_t = ;
500 S3 a_T = (TYPE) a_t;
501 S4 b_T = (TYPE) b_t;
502 S5 prod_T = a_T * b_T;
503 S6 u_prod_T = (unsigned TYPE) prod_T;
505 and multiplication by constants:
507 type a_t;
508 TYPE a_T, prod_T;
510 S1 a_t = ;
511 S3 a_T = (TYPE) a_t;
512 S5 prod_T = a_T * CONST;
514 A special case of multiplication by constants is when 'TYPE' is 4 times
515 bigger than 'type', but CONST fits an intermediate type 2 times smaller
516 than 'TYPE'. In that case we create an additional pattern stmt for S3
517 to create a variable of the intermediate type, and perform widen-mult
518 on the intermediate type as well:
520 type a_t;
521 interm_type a_it;
522 TYPE a_T, prod_T, prod_T';
524 S1 a_t = ;
525 S3 a_T = (TYPE) a_t;
526 '--> a_it = (interm_type) a_t;
527 S5 prod_T = a_T * CONST;
528 '--> prod_T' = a_it w* CONST;
530 Input/Output:
532 * STMTS: Contains a stmt from which the pattern search begins. In the
533 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
534 is detected. In case of unsigned widen-mult, the original stmt (S5) is
535 replaced with S6 in STMTS. In case of multiplication by a constant
536 of an intermediate type (the last case above), STMTS also contains S3
537 (inserted before S5).
539 Output:
541 * TYPE_IN: The type of the input arguments to the pattern.
543 * TYPE_OUT: The type of the output of this pattern.
545 * Return value: A new stmt that will be used to replace the sequence of
546 stmts that constitute the pattern. In this case it will be:
547 WIDEN_MULT <a_t, b_t>
550 static gimple
551 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
552 tree *type_in, tree *type_out)
554 gimple last_stmt = VEC_pop (gimple, *stmts);
555 gimple def_stmt0, def_stmt1;
556 tree oprnd0, oprnd1;
557 tree type, half_type0, half_type1;
558 gimple pattern_stmt;
559 tree vectype, vectype_out = NULL_TREE;
560 tree dummy;
561 tree var;
562 enum tree_code dummy_code;
563 int dummy_int;
564 VEC (tree, heap) *dummy_vec;
565 bool op1_ok;
566 bool promotion;
568 if (!is_gimple_assign (last_stmt))
569 return NULL;
571 type = gimple_expr_type (last_stmt);
573 /* Starting from LAST_STMT, follow the defs of its uses in search
574 of the above pattern. */
576 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
577 return NULL;
579 oprnd0 = gimple_assign_rhs1 (last_stmt);
580 oprnd1 = gimple_assign_rhs2 (last_stmt);
581 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
582 || !types_compatible_p (TREE_TYPE (oprnd1), type))
583 return NULL;
585 /* Check argument 0. */
586 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
587 &promotion)
588 || !promotion)
589 return NULL;
590 /* Check argument 1. */
591 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
592 &def_stmt1, &promotion);
594 if (op1_ok && promotion)
596 oprnd0 = gimple_assign_rhs1 (def_stmt0);
597 oprnd1 = gimple_assign_rhs1 (def_stmt1);
599 else
601 if (TREE_CODE (oprnd1) == INTEGER_CST
602 && TREE_CODE (half_type0) == INTEGER_TYPE
603 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
604 &oprnd0, stmts, type,
605 &half_type0, def_stmt0))
606 half_type1 = half_type0;
607 else
608 return NULL;
611 /* Handle unsigned case. Look for
612 S6 u_prod_T = (unsigned TYPE) prod_T;
613 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
614 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
616 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
617 imm_use_iterator imm_iter;
618 use_operand_p use_p;
619 int nuses = 0;
620 gimple use_stmt = NULL;
621 tree use_type;
623 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
624 return NULL;
626 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
628 if (is_gimple_debug (USE_STMT (use_p)))
629 continue;
630 use_stmt = USE_STMT (use_p);
631 nuses++;
634 if (nuses != 1 || !is_gimple_assign (use_stmt)
635 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
636 return NULL;
638 use_lhs = gimple_assign_lhs (use_stmt);
639 use_type = TREE_TYPE (use_lhs);
640 if (!INTEGRAL_TYPE_P (use_type)
641 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
642 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
643 return NULL;
645 type = use_type;
646 last_stmt = use_stmt;
649 if (!types_compatible_p (half_type0, half_type1))
650 return NULL;
652 /* Pattern detected. */
653 if (vect_print_dump_info (REPORT_DETAILS))
654 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
656 /* Check target support */
657 vectype = get_vectype_for_scalar_type (half_type0);
658 vectype_out = get_vectype_for_scalar_type (type);
659 if (!vectype
660 || !vectype_out
661 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
662 vectype_out, vectype,
663 &dummy, &dummy, &dummy_code,
664 &dummy_code, &dummy_int, &dummy_vec))
665 return NULL;
667 *type_in = vectype;
668 *type_out = vectype_out;
670 /* Pattern supported. Create a stmt to be used to replace the pattern: */
671 var = vect_recog_temp_ssa_var (type, NULL);
672 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
673 oprnd1);
675 if (vect_print_dump_info (REPORT_DETAILS))
676 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
678 VEC_safe_push (gimple, heap, *stmts, last_stmt);
679 return pattern_stmt;
683 /* Function vect_recog_pow_pattern
685 Try to find the following pattern:
687 x = POW (y, N);
689 with POW being one of pow, powf, powi, powif and N being
690 either 2 or 0.5.
692 Input:
694 * LAST_STMT: A stmt from which the pattern search begins.
696 Output:
698 * TYPE_IN: The type of the input arguments to the pattern.
700 * TYPE_OUT: The type of the output of this pattern.
702 * Return value: A new stmt that will be used to replace the sequence of
703 stmts that constitute the pattern. In this case it will be:
704 x = x * x
706 x = sqrt (x)
709 static gimple
710 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
711 tree *type_out)
713 gimple last_stmt = VEC_index (gimple, *stmts, 0);
714 tree fn, base, exp = NULL;
715 gimple stmt;
716 tree var;
718 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
719 return NULL;
721 fn = gimple_call_fndecl (last_stmt);
722 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
723 return NULL;
725 switch (DECL_FUNCTION_CODE (fn))
727 case BUILT_IN_POWIF:
728 case BUILT_IN_POWI:
729 case BUILT_IN_POWF:
730 case BUILT_IN_POW:
731 base = gimple_call_arg (last_stmt, 0);
732 exp = gimple_call_arg (last_stmt, 1);
733 if (TREE_CODE (exp) != REAL_CST
734 && TREE_CODE (exp) != INTEGER_CST)
735 return NULL;
736 break;
738 default:
739 return NULL;
742 /* We now have a pow or powi builtin function call with a constant
743 exponent. */
745 *type_out = NULL_TREE;
747 /* Catch squaring. */
748 if ((host_integerp (exp, 0)
749 && tree_low_cst (exp, 0) == 2)
750 || (TREE_CODE (exp) == REAL_CST
751 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
753 *type_in = TREE_TYPE (base);
755 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
756 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
757 return stmt;
760 /* Catch square root. */
761 if (TREE_CODE (exp) == REAL_CST
762 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
764 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
765 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
766 if (*type_in)
768 gimple stmt = gimple_build_call (newfn, 1, base);
769 if (vectorizable_function (stmt, *type_in, *type_in)
770 != NULL_TREE)
772 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
773 gimple_call_set_lhs (stmt, var);
774 return stmt;
779 return NULL;
783 /* Function vect_recog_widen_sum_pattern
785 Try to find the following pattern:
787 type x_t;
788 TYPE x_T, sum = init;
789 loop:
790 sum_0 = phi <init, sum_1>
791 S1 x_t = *p;
792 S2 x_T = (TYPE) x_t;
793 S3 sum_1 = x_T + sum_0;
795 where type 'TYPE' is at least double the size of type 'type', i.e - we're
796 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
797 a special case of a reduction computation.
799 Input:
801 * LAST_STMT: A stmt from which the pattern search begins. In the example,
802 when this function is called with S3, the pattern {S2,S3} will be detected.
804 Output:
806 * TYPE_IN: The type of the input arguments to the pattern.
808 * TYPE_OUT: The type of the output of this pattern.
810 * Return value: A new stmt that will be used to replace the sequence of
811 stmts that constitute the pattern. In this case it will be:
812 WIDEN_SUM <x_t, sum_0>
814 Note: The widening-sum idiom is a widening reduction pattern that is
815 vectorized without preserving all the intermediate results. It
816 produces only N/2 (widened) results (by summing up pairs of
817 intermediate results) rather than all N results. Therefore, we
818 cannot allow this pattern when we want to get all the results and in
819 the correct order (as is the case when this computation is in an
820 inner-loop nested in an outer-loop that us being vectorized). */
822 static gimple
823 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
824 tree *type_out)
826 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
827 tree oprnd0, oprnd1;
828 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
829 tree type, half_type;
830 gimple pattern_stmt;
831 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
832 struct loop *loop;
833 tree var;
834 bool promotion;
836 if (!loop_info)
837 return NULL;
839 loop = LOOP_VINFO_LOOP (loop_info);
841 if (!is_gimple_assign (last_stmt))
842 return NULL;
844 type = gimple_expr_type (last_stmt);
846 /* Look for the following pattern
847 DX = (TYPE) X;
848 sum_1 = DX + sum_0;
849 In which DX is at least double the size of X, and sum_1 has been
850 recognized as a reduction variable.
853 /* Starting from LAST_STMT, follow the defs of its uses in search
854 of the above pattern. */
856 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
857 return NULL;
859 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
860 return NULL;
862 oprnd0 = gimple_assign_rhs1 (last_stmt);
863 oprnd1 = gimple_assign_rhs2 (last_stmt);
864 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
865 || !types_compatible_p (TREE_TYPE (oprnd1), type))
866 return NULL;
868 /* So far so good. Since last_stmt was detected as a (summation) reduction,
869 we know that oprnd1 is the reduction variable (defined by a loop-header
870 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
871 Left to check that oprnd0 is defined by a cast from type 'type' to type
872 'TYPE'. */
874 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
875 &promotion)
876 || !promotion)
877 return NULL;
879 oprnd0 = gimple_assign_rhs1 (stmt);
880 *type_in = half_type;
881 *type_out = type;
883 /* Pattern detected. Create a stmt to be used to replace the pattern: */
884 var = vect_recog_temp_ssa_var (type, NULL);
885 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
886 oprnd0, oprnd1);
888 if (vect_print_dump_info (REPORT_DETAILS))
890 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
891 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
894 /* We don't allow changing the order of the computation in the inner-loop
895 when doing outer-loop vectorization. */
896 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
898 return pattern_stmt;
902 /* Return TRUE if the operation in STMT can be performed on a smaller type.
904 Input:
905 STMT - a statement to check.
906 DEF - we support operations with two operands, one of which is constant.
907 The other operand can be defined by a demotion operation, or by a
908 previous statement in a sequence of over-promoted operations. In the
909 later case DEF is used to replace that operand. (It is defined by a
910 pattern statement we created for the previous statement in the
911 sequence).
913 Input/output:
914 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
915 NULL, it's the type of DEF.
916 STMTS - additional pattern statements. If a pattern statement (type
917 conversion) is created in this function, its original statement is
918 added to STMTS.
920 Output:
921 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
922 operands to use in the new pattern statement for STMT (will be created
923 in vect_recog_over_widening_pattern ()).
924 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
925 statements for STMT: the first one is a type promotion and the second
926 one is the operation itself. We return the type promotion statement
927 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
928 the second pattern statement. */
930 static bool
931 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
932 tree *op0, tree *op1, gimple *new_def_stmt,
933 VEC (gimple, heap) **stmts)
935 enum tree_code code;
936 tree const_oprnd, oprnd;
937 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
938 gimple def_stmt, new_stmt;
939 bool first = false;
940 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
941 bb_vec_info bb_info = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
942 struct loop *loop = NULL;
943 bool promotion;
945 if (loop_info)
946 loop = LOOP_VINFO_LOOP (loop_info);
948 *op0 = NULL_TREE;
949 *op1 = NULL_TREE;
950 *new_def_stmt = NULL;
952 if (!is_gimple_assign (stmt))
953 return false;
955 code = gimple_assign_rhs_code (stmt);
956 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
957 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
958 return false;
960 oprnd = gimple_assign_rhs1 (stmt);
961 const_oprnd = gimple_assign_rhs2 (stmt);
962 type = gimple_expr_type (stmt);
964 if (TREE_CODE (oprnd) != SSA_NAME
965 || TREE_CODE (const_oprnd) != INTEGER_CST)
966 return false;
968 /* If we are in the middle of a sequence, we use DEF from a previous
969 statement. Otherwise, OPRND has to be a result of type promotion. */
970 if (*new_type)
972 half_type = *new_type;
973 oprnd = def;
975 else
977 first = true;
978 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
979 &promotion)
980 || !promotion
981 || !gimple_bb (def_stmt)
982 || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
983 || (!loop && gimple_bb (def_stmt) != BB_VINFO_BB (bb_info)
984 && gimple_code (def_stmt) != GIMPLE_PHI)
985 || !vinfo_for_stmt (def_stmt))
986 return false;
989 /* Can we perform the operation on a smaller type? */
990 switch (code)
992 case BIT_IOR_EXPR:
993 case BIT_XOR_EXPR:
994 case BIT_AND_EXPR:
995 if (!int_fits_type_p (const_oprnd, half_type))
997 /* HALF_TYPE is not enough. Try a bigger type if possible. */
998 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
999 return false;
1001 interm_type = build_nonstandard_integer_type (
1002 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1003 if (!int_fits_type_p (const_oprnd, interm_type))
1004 return false;
1007 break;
1009 case LSHIFT_EXPR:
1010 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1011 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1012 return false;
1014 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1015 (e.g., if the original value was char, the shift amount is at most 8
1016 if we want to use short). */
1017 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1018 return false;
1020 interm_type = build_nonstandard_integer_type (
1021 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1023 if (!vect_supportable_shift (code, interm_type))
1024 return false;
1026 break;
1028 case RSHIFT_EXPR:
1029 if (vect_supportable_shift (code, half_type))
1030 break;
1032 /* Try intermediate type - HALF_TYPE is not supported. */
1033 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1034 return false;
1036 interm_type = build_nonstandard_integer_type (
1037 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1039 if (!vect_supportable_shift (code, interm_type))
1040 return false;
1042 break;
1044 default:
1045 gcc_unreachable ();
1048 /* There are four possible cases:
1049 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1050 the first statement in the sequence)
1051 a. The original, HALF_TYPE, is not enough - we replace the promotion
1052 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1053 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1054 promotion.
1055 2. OPRND is defined by a pattern statement we created.
1056 a. Its type is not sufficient for the operation, we create a new stmt:
1057 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1058 this statement in NEW_DEF_STMT, and it is later put in
1059 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1060 b. OPRND is good to use in the new statement. */
1061 if (first)
1063 if (interm_type)
1065 /* Replace the original type conversion HALF_TYPE->TYPE with
1066 HALF_TYPE->INTERM_TYPE. */
1067 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1069 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1070 /* Check if the already created pattern stmt is what we need. */
1071 if (!is_gimple_assign (new_stmt)
1072 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1073 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1074 return false;
1076 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1077 oprnd = gimple_assign_lhs (new_stmt);
1079 else
1081 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1082 oprnd = gimple_assign_rhs1 (def_stmt);
1083 tmp = create_tmp_reg (interm_type, NULL);
1084 add_referenced_var (tmp);
1085 new_oprnd = make_ssa_name (tmp, NULL);
1086 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1087 oprnd, NULL_TREE);
1088 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1089 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1090 oprnd = new_oprnd;
1093 else
1095 /* Retrieve the operand before the type promotion. */
1096 oprnd = gimple_assign_rhs1 (def_stmt);
1099 else
1101 if (interm_type)
1103 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1104 tmp = create_tmp_reg (interm_type, NULL);
1105 add_referenced_var (tmp);
1106 new_oprnd = make_ssa_name (tmp, NULL);
1107 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1108 oprnd, NULL_TREE);
1109 oprnd = new_oprnd;
1110 *new_def_stmt = new_stmt;
1113 /* Otherwise, OPRND is already set. */
1116 if (interm_type)
1117 *new_type = interm_type;
1118 else
1119 *new_type = half_type;
1121 *op0 = oprnd;
1122 *op1 = fold_convert (*new_type, const_oprnd);
1124 return true;
1128 /* Try to find a statement or a sequence of statements that can be performed
1129 on a smaller type:
1131 type x_t;
1132 TYPE x_T, res0_T, res1_T;
1133 loop:
1134 S1 x_t = *p;
1135 S2 x_T = (TYPE) x_t;
1136 S3 res0_T = op (x_T, C0);
1137 S4 res1_T = op (res0_T, C1);
1138 S5 ... = () res1_T; - type demotion
1140 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1141 constants.
1142 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1143 be 'type' or some intermediate type. For now, we expect S5 to be a type
1144 demotion operation. We also check that S3 and S4 have only one use. */
1146 static gimple
1147 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1148 tree *type_in, tree *type_out)
1150 gimple stmt = VEC_pop (gimple, *stmts);
1151 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1152 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1153 imm_use_iterator imm_iter;
1154 use_operand_p use_p;
1155 int nuses = 0;
1156 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1157 bool first;
1158 tree type = NULL;
1159 loop_vec_info loop_vinfo;
1160 struct loop *loop = NULL;
1161 bb_vec_info bb_vinfo;
1162 stmt_vec_info stmt_vinfo;
1164 stmt_vinfo = vinfo_for_stmt (stmt);
1165 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1166 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1167 if (loop_vinfo)
1168 loop = LOOP_VINFO_LOOP (loop_vinfo);
1170 first = true;
1171 while (1)
1173 if (!vinfo_for_stmt (stmt)
1174 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1175 return NULL;
1177 new_def_stmt = NULL;
1178 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1179 &op0, &op1, &new_def_stmt,
1180 stmts))
1182 if (first)
1183 return NULL;
1184 else
1185 break;
1188 /* STMT can be performed on a smaller type. Check its uses. */
1189 lhs = gimple_assign_lhs (stmt);
1190 nuses = 0;
1191 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1193 if (is_gimple_debug (USE_STMT (use_p)))
1194 continue;
1195 use_stmt = USE_STMT (use_p);
1196 nuses++;
1199 if (nuses != 1 || !is_gimple_assign (use_stmt)
1200 || !gimple_bb (use_stmt)
1201 || (loop && !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1202 || (!loop && gimple_bb (use_stmt) != BB_VINFO_BB (bb_vinfo)))
1203 return NULL;
1205 /* Create pattern statement for STMT. */
1206 vectype = get_vectype_for_scalar_type (new_type);
1207 if (!vectype)
1208 return NULL;
1210 /* We want to collect all the statements for which we create pattern
1211 statetments, except for the case when the last statement in the
1212 sequence doesn't have a corresponding pattern statement. In such
1213 case we associate the last pattern statement with the last statement
1214 in the sequence. Therefore, we only add the original statement to
1215 the list if we know that it is not the last. */
1216 if (prev_stmt)
1217 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1219 var = vect_recog_temp_ssa_var (new_type, NULL);
1220 pattern_stmt
1221 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1222 op0, op1);
1223 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1224 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1226 if (vect_print_dump_info (REPORT_DETAILS))
1228 fprintf (vect_dump, "created pattern stmt: ");
1229 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1232 type = gimple_expr_type (stmt);
1233 prev_stmt = stmt;
1234 stmt = use_stmt;
1236 first = false;
1239 /* We got a sequence. We expect it to end with a type demotion operation.
1240 Otherwise, we quit (for now). There are three possible cases: the
1241 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1242 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1243 NEW_TYPE differs (we create a new conversion statement). */
1244 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1246 use_lhs = gimple_assign_lhs (use_stmt);
1247 use_type = TREE_TYPE (use_lhs);
1248 /* Support only type demotion or signedess change. */
1249 if (!INTEGRAL_TYPE_P (use_type)
1250 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1251 return NULL;
1253 /* Check that NEW_TYPE is not bigger than the conversion result. */
1254 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1255 return NULL;
1257 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1258 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1260 /* Create NEW_TYPE->USE_TYPE conversion. */
1261 tmp = create_tmp_reg (use_type, NULL);
1262 add_referenced_var (tmp);
1263 new_oprnd = make_ssa_name (tmp, NULL);
1264 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1265 var, NULL_TREE);
1266 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1268 *type_in = get_vectype_for_scalar_type (new_type);
1269 *type_out = get_vectype_for_scalar_type (use_type);
1271 /* We created a pattern statement for the last statement in the
1272 sequence, so we don't need to associate it with the pattern
1273 statement created for PREV_STMT. Therefore, we add PREV_STMT
1274 to the list in order to mark it later in vect_pattern_recog_1. */
1275 if (prev_stmt)
1276 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1278 else
1280 if (prev_stmt)
1281 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1282 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1284 *type_in = vectype;
1285 *type_out = NULL_TREE;
1288 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1290 else
1291 /* TODO: support general case, create a conversion to the correct type. */
1292 return NULL;
1294 /* Pattern detected. */
1295 if (vect_print_dump_info (REPORT_DETAILS))
1297 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1298 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1301 return pattern_stmt;
1304 /* Detect widening shift pattern:
1306 type a_t;
1307 TYPE a_T, res_T;
1309 S1 a_t = ;
1310 S2 a_T = (TYPE) a_t;
1311 S3 res_T = a_T << CONST;
1313 where type 'TYPE' is at least double the size of type 'type'.
1315 Also detect unsigned cases:
1317 unsigned type a_t;
1318 unsigned TYPE u_res_T;
1319 TYPE a_T, res_T;
1321 S1 a_t = ;
1322 S2 a_T = (TYPE) a_t;
1323 S3 res_T = a_T << CONST;
1324 S4 u_res_T = (unsigned TYPE) res_T;
1326 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1327 create an additional pattern stmt for S2 to create a variable of an
1328 intermediate type, and perform widen-shift on the intermediate type:
1330 type a_t;
1331 interm_type a_it;
1332 TYPE a_T, res_T, res_T';
1334 S1 a_t = ;
1335 S2 a_T = (TYPE) a_t;
1336 '--> a_it = (interm_type) a_t;
1337 S3 res_T = a_T << CONST;
1338 '--> res_T' = a_it <<* CONST;
1340 Input/Output:
1342 * STMTS: Contains a stmt from which the pattern search begins.
1343 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1344 in STMTS. When an intermediate type is used and a pattern statement is
1345 created for S2, we also put S2 here (before S3).
1347 Output:
1349 * TYPE_IN: The type of the input arguments to the pattern.
1351 * TYPE_OUT: The type of the output of this pattern.
1353 * Return value: A new stmt that will be used to replace the sequence of
1354 stmts that constitute the pattern. In this case it will be:
1355 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1357 static gimple
1358 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1359 tree *type_in, tree *type_out)
1361 gimple last_stmt = VEC_pop (gimple, *stmts);
1362 gimple def_stmt0;
1363 tree oprnd0, oprnd1;
1364 tree type, half_type0;
1365 gimple pattern_stmt, orig_stmt = NULL;
1366 tree vectype, vectype_out = NULL_TREE;
1367 tree dummy;
1368 tree var;
1369 enum tree_code dummy_code;
1370 int dummy_int;
1371 VEC (tree, heap) * dummy_vec;
1372 gimple use_stmt = NULL;
1373 bool over_widen = false;
1374 bool promotion;
1376 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1377 return NULL;
1379 orig_stmt = last_stmt;
1380 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1382 /* This statement was also detected as over-widening operation (it can't
1383 be any other pattern, because only over-widening detects shifts).
1384 LAST_STMT is the final type demotion statement, but its related
1385 statement is shift. We analyze the related statement to catch cases:
1387 orig code:
1388 type a_t;
1389 itype res;
1390 TYPE a_T, res_T;
1392 S1 a_T = (TYPE) a_t;
1393 S2 res_T = a_T << CONST;
1394 S3 res = (itype)res_T;
1396 (size of type * 2 <= size of itype
1397 and size of itype * 2 <= size of TYPE)
1399 code after over-widening pattern detection:
1401 S1 a_T = (TYPE) a_t;
1402 --> a_it = (itype) a_t;
1403 S2 res_T = a_T << CONST;
1404 S3 res = (itype)res_T; <--- LAST_STMT
1405 --> res = a_it << CONST;
1407 after widen_shift:
1409 S1 a_T = (TYPE) a_t;
1410 --> a_it = (itype) a_t; - redundant
1411 S2 res_T = a_T << CONST;
1412 S3 res = (itype)res_T;
1413 --> res = a_t w<< CONST;
1415 i.e., we replace the three statements with res = a_t w<< CONST. */
1416 last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1417 over_widen = true;
1420 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1421 return NULL;
1423 oprnd0 = gimple_assign_rhs1 (last_stmt);
1424 oprnd1 = gimple_assign_rhs2 (last_stmt);
1425 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1426 return NULL;
1428 /* Check operand 0: it has to be defined by a type promotion. */
1429 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1430 &promotion)
1431 || !promotion)
1432 return NULL;
1434 /* Check operand 1: has to be positive. We check that it fits the type
1435 in vect_handle_widen_op_by_const (). */
1436 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1437 return NULL;
1439 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1440 type = gimple_expr_type (last_stmt);
1442 /* Check if this a widening operation. */
1443 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1444 &oprnd0, stmts,
1445 type, &half_type0, def_stmt0))
1446 return NULL;
1448 /* Handle unsigned case. Look for
1449 S4 u_res_T = (unsigned TYPE) res_T;
1450 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1451 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1453 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1454 imm_use_iterator imm_iter;
1455 use_operand_p use_p;
1456 int nuses = 0;
1457 tree use_type;
1459 if (over_widen)
1461 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1462 We check here that TYPE is the correct type for the operation,
1463 i.e., it's the type of the original result. */
1464 tree orig_type = gimple_expr_type (orig_stmt);
1465 if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1466 || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1467 return NULL;
1469 else
1471 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1473 if (is_gimple_debug (USE_STMT (use_p)))
1474 continue;
1475 use_stmt = USE_STMT (use_p);
1476 nuses++;
1479 if (nuses != 1 || !is_gimple_assign (use_stmt)
1480 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1481 return NULL;
1483 use_lhs = gimple_assign_lhs (use_stmt);
1484 use_type = TREE_TYPE (use_lhs);
1486 if (!INTEGRAL_TYPE_P (use_type)
1487 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1488 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1489 return NULL;
1491 type = use_type;
1495 /* Pattern detected. */
1496 if (vect_print_dump_info (REPORT_DETAILS))
1497 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1499 /* Check target support. */
1500 vectype = get_vectype_for_scalar_type (half_type0);
1501 vectype_out = get_vectype_for_scalar_type (type);
1503 if (!vectype
1504 || !vectype_out
1505 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1506 vectype_out, vectype,
1507 &dummy, &dummy, &dummy_code,
1508 &dummy_code, &dummy_int,
1509 &dummy_vec))
1510 return NULL;
1512 *type_in = vectype;
1513 *type_out = vectype_out;
1515 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1516 var = vect_recog_temp_ssa_var (type, NULL);
1517 pattern_stmt =
1518 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1520 if (vect_print_dump_info (REPORT_DETAILS))
1521 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1523 if (use_stmt)
1524 last_stmt = use_stmt;
1525 else
1526 last_stmt = orig_stmt;
1528 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1529 return pattern_stmt;
1532 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1533 vectorized:
1535 type a_t;
1536 TYPE b_T, res_T;
1538 S1 a_t = ;
1539 S2 b_T = ;
1540 S3 res_T = b_T op a_t;
1542 where type 'TYPE' is a type with different size than 'type',
1543 and op is <<, >> or rotate.
1545 Also detect cases:
1547 type a_t;
1548 TYPE b_T, c_T, res_T;
1550 S0 c_T = ;
1551 S1 a_t = (type) c_T;
1552 S2 b_T = ;
1553 S3 res_T = b_T op a_t;
1555 Input/Output:
1557 * STMTS: Contains a stmt from which the pattern search begins,
1558 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1559 with a shift/rotate which has same type on both operands, in the
1560 second case just b_T op c_T, in the first case with added cast
1561 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1563 Output:
1565 * TYPE_IN: The type of the input arguments to the pattern.
1567 * TYPE_OUT: The type of the output of this pattern.
1569 * Return value: A new stmt that will be used to replace the shift/rotate
1570 S3 stmt. */
1572 static gimple
1573 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1574 tree *type_in, tree *type_out)
1576 gimple last_stmt = VEC_pop (gimple, *stmts);
1577 tree oprnd0, oprnd1, lhs, var;
1578 gimple pattern_stmt, def_stmt;
1579 enum tree_code rhs_code;
1580 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1581 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1582 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1583 enum vect_def_type dt;
1584 tree def;
1586 if (!is_gimple_assign (last_stmt))
1587 return NULL;
1589 rhs_code = gimple_assign_rhs_code (last_stmt);
1590 switch (rhs_code)
1592 case LSHIFT_EXPR:
1593 case RSHIFT_EXPR:
1594 case LROTATE_EXPR:
1595 case RROTATE_EXPR:
1596 break;
1597 default:
1598 return NULL;
1601 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1602 return NULL;
1604 lhs = gimple_assign_lhs (last_stmt);
1605 oprnd0 = gimple_assign_rhs1 (last_stmt);
1606 oprnd1 = gimple_assign_rhs2 (last_stmt);
1607 if (TREE_CODE (oprnd0) != SSA_NAME
1608 || TREE_CODE (oprnd1) != SSA_NAME
1609 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1610 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1611 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1612 || TYPE_PRECISION (TREE_TYPE (lhs))
1613 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1614 return NULL;
1616 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1617 &def, &dt))
1618 return NULL;
1620 if (dt != vect_internal_def)
1621 return NULL;
1623 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1624 *type_out = *type_in;
1625 if (*type_in == NULL_TREE)
1626 return NULL;
1628 def = NULL_TREE;
1629 if (gimple_assign_cast_p (def_stmt))
1631 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1632 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1633 && TYPE_PRECISION (TREE_TYPE (rhs1))
1634 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1635 def = rhs1;
1638 if (def == NULL_TREE)
1640 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1641 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1642 NULL_TREE);
1643 new_pattern_def_seq (stmt_vinfo, def_stmt);
1646 /* Pattern detected. */
1647 if (vect_print_dump_info (REPORT_DETAILS))
1648 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1650 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1651 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1652 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1654 if (vect_print_dump_info (REPORT_DETAILS))
1655 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1657 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1658 return pattern_stmt;
1661 /* Detect a signed division by power of two constant that wouldn't be
1662 otherwise vectorized:
1664 type a_t, b_t;
1666 S1 a_t = b_t / N;
1668 where type 'type' is a signed integral type and N is a constant positive
1669 power of two.
1671 Similarly handle signed modulo by power of two constant:
1673 S4 a_t = b_t % N;
1675 Input/Output:
1677 * STMTS: Contains a stmt from which the pattern search begins,
1678 i.e. the division stmt. S1 is replaced by:
1679 S3 y_t = b_t < 0 ? N - 1 : 0;
1680 S2 x_t = b_t + y_t;
1681 S1' a_t = x_t >> log2 (N);
1683 S4 is replaced by (where *_T temporaries have unsigned type):
1684 S9 y_T = b_t < 0 ? -1U : 0U;
1685 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1686 S7 z_t = (type) z_T;
1687 S6 w_t = b_t + z_t;
1688 S5 x_t = w_t & (N - 1);
1689 S4' a_t = x_t - z_t;
1691 Output:
1693 * TYPE_IN: The type of the input arguments to the pattern.
1695 * TYPE_OUT: The type of the output of this pattern.
1697 * Return value: A new stmt that will be used to replace the division
1698 S1 or modulo S4 stmt. */
1700 static gimple
1701 vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1702 tree *type_in, tree *type_out)
1704 gimple last_stmt = VEC_pop (gimple, *stmts);
1705 tree oprnd0, oprnd1, vectype, itype, cond;
1706 gimple pattern_stmt, def_stmt;
1707 enum tree_code rhs_code;
1708 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1709 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1710 optab optab;
1712 if (!is_gimple_assign (last_stmt))
1713 return NULL;
1715 rhs_code = gimple_assign_rhs_code (last_stmt);
1716 switch (rhs_code)
1718 case TRUNC_DIV_EXPR:
1719 case TRUNC_MOD_EXPR:
1720 break;
1721 default:
1722 return NULL;
1725 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1726 return NULL;
1728 oprnd0 = gimple_assign_rhs1 (last_stmt);
1729 oprnd1 = gimple_assign_rhs2 (last_stmt);
1730 itype = TREE_TYPE (oprnd0);
1731 if (TREE_CODE (oprnd0) != SSA_NAME
1732 || TREE_CODE (oprnd1) != INTEGER_CST
1733 || TREE_CODE (itype) != INTEGER_TYPE
1734 || TYPE_UNSIGNED (itype)
1735 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1736 || !integer_pow2p (oprnd1)
1737 || tree_int_cst_sgn (oprnd1) != 1)
1738 return NULL;
1740 vectype = get_vectype_for_scalar_type (itype);
1741 if (vectype == NULL_TREE)
1742 return NULL;
1744 /* If the target can handle vectorized division or modulo natively,
1745 don't attempt to optimize this. */
1746 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1747 if (optab != NULL)
1749 enum machine_mode vec_mode = TYPE_MODE (vectype);
1750 int icode = (int) optab_handler (optab, vec_mode);
1751 if (icode != CODE_FOR_nothing
1752 || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1753 return NULL;
1756 /* Pattern detected. */
1757 if (vect_print_dump_info (REPORT_DETAILS))
1758 fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1760 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1761 if (rhs_code == TRUNC_DIV_EXPR)
1763 tree var = vect_recog_temp_ssa_var (itype, NULL);
1764 def_stmt
1765 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1766 fold_build2 (MINUS_EXPR, itype,
1767 oprnd1,
1768 build_int_cst (itype,
1769 1)),
1770 build_int_cst (itype, 0));
1771 new_pattern_def_seq (stmt_vinfo, def_stmt);
1772 var = vect_recog_temp_ssa_var (itype, NULL);
1773 def_stmt
1774 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1775 gimple_assign_lhs (def_stmt));
1776 append_pattern_def_seq (stmt_vinfo, def_stmt);
1778 pattern_stmt
1779 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1780 vect_recog_temp_ssa_var (itype, NULL),
1781 var,
1782 build_int_cst (itype,
1783 tree_log2 (oprnd1)));
1785 else
1787 tree signmask;
1788 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1789 if (compare_tree_int (oprnd1, 2) == 0)
1791 signmask = vect_recog_temp_ssa_var (itype, NULL);
1792 def_stmt
1793 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1794 build_int_cst (itype, 1),
1795 build_int_cst (itype, 0));
1796 append_pattern_def_seq (stmt_vinfo, def_stmt);
1798 else
1800 tree utype
1801 = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1802 tree vecutype = get_vectype_for_scalar_type (utype);
1803 tree shift
1804 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1805 - tree_log2 (oprnd1));
1806 tree var = vect_recog_temp_ssa_var (utype, NULL);
1807 stmt_vec_info def_stmt_vinfo;
1809 def_stmt
1810 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1811 build_int_cst (utype, -1),
1812 build_int_cst (utype, 0));
1813 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1814 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1815 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1816 append_pattern_def_seq (stmt_vinfo, def_stmt);
1817 var = vect_recog_temp_ssa_var (utype, NULL);
1818 def_stmt
1819 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1820 gimple_assign_lhs (def_stmt),
1821 shift);
1822 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1823 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1824 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1825 append_pattern_def_seq (stmt_vinfo, def_stmt);
1826 signmask = vect_recog_temp_ssa_var (itype, NULL);
1827 def_stmt
1828 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1829 NULL_TREE);
1830 append_pattern_def_seq (stmt_vinfo, def_stmt);
1832 def_stmt
1833 = gimple_build_assign_with_ops (PLUS_EXPR,
1834 vect_recog_temp_ssa_var (itype, NULL),
1835 oprnd0, signmask);
1836 append_pattern_def_seq (stmt_vinfo, def_stmt);
1837 def_stmt
1838 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1839 vect_recog_temp_ssa_var (itype, NULL),
1840 gimple_assign_lhs (def_stmt),
1841 fold_build2 (MINUS_EXPR, itype,
1842 oprnd1,
1843 build_int_cst (itype,
1844 1)));
1845 append_pattern_def_seq (stmt_vinfo, def_stmt);
1847 pattern_stmt
1848 = gimple_build_assign_with_ops (MINUS_EXPR,
1849 vect_recog_temp_ssa_var (itype, NULL),
1850 gimple_assign_lhs (def_stmt),
1851 signmask);
1854 if (vect_print_dump_info (REPORT_DETAILS))
1855 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1857 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1859 *type_in = vectype;
1860 *type_out = vectype;
1861 return pattern_stmt;
1864 /* Function vect_recog_mixed_size_cond_pattern
1866 Try to find the following pattern:
1868 type x_t, y_t;
1869 TYPE a_T, b_T, c_T;
1870 loop:
1871 S1 a_T = x_t CMP y_t ? b_T : c_T;
1873 where type 'TYPE' is an integral type which has different size
1874 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
1875 than 'type', the constants need to fit into an integer type
1876 with the same width as 'type') or results of conversion from 'type'.
1878 Input:
1880 * LAST_STMT: A stmt from which the pattern search begins.
1882 Output:
1884 * TYPE_IN: The type of the input arguments to the pattern.
1886 * TYPE_OUT: The type of the output of this pattern.
1888 * Return value: A new stmt that will be used to replace the pattern.
1889 Additionally a def_stmt is added.
1891 a_it = x_t CMP y_t ? b_it : c_it;
1892 a_T = (TYPE) a_it; */
1894 static gimple
1895 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1896 tree *type_out)
1898 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1899 tree cond_expr, then_clause, else_clause;
1900 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
1901 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
1902 enum machine_mode cmpmode;
1903 gimple pattern_stmt, def_stmt;
1904 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1905 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1906 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
1907 gimple def_stmt0 = NULL, def_stmt1 = NULL;
1908 bool promotion;
1909 tree comp_scalar_type;
1911 if (!is_gimple_assign (last_stmt)
1912 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1913 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1914 return NULL;
1916 cond_expr = gimple_assign_rhs1 (last_stmt);
1917 then_clause = gimple_assign_rhs2 (last_stmt);
1918 else_clause = gimple_assign_rhs3 (last_stmt);
1920 if (!COMPARISON_CLASS_P (cond_expr))
1921 return NULL;
1923 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
1924 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
1925 if (comp_vectype == NULL_TREE)
1926 return NULL;
1928 type = gimple_expr_type (last_stmt);
1929 if (types_compatible_p (type, comp_scalar_type)
1930 || ((TREE_CODE (then_clause) != INTEGER_CST
1931 || TREE_CODE (else_clause) != INTEGER_CST)
1932 && !INTEGRAL_TYPE_P (comp_scalar_type))
1933 || !INTEGRAL_TYPE_P (type))
1934 return NULL;
1936 if ((TREE_CODE (then_clause) != INTEGER_CST
1937 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
1938 &def_stmt0, &promotion))
1939 || (TREE_CODE (else_clause) != INTEGER_CST
1940 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
1941 &def_stmt1, &promotion)))
1942 return NULL;
1944 if (orig_type0 && orig_type1
1945 && !types_compatible_p (orig_type0, orig_type1))
1946 return NULL;
1948 if (orig_type0)
1950 if (!types_compatible_p (orig_type0, comp_scalar_type))
1951 return NULL;
1952 then_clause = gimple_assign_rhs1 (def_stmt0);
1953 itype = orig_type0;
1956 if (orig_type1)
1958 if (!types_compatible_p (orig_type1, comp_scalar_type))
1959 return NULL;
1960 else_clause = gimple_assign_rhs1 (def_stmt1);
1961 itype = orig_type1;
1964 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1966 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1967 return NULL;
1969 vectype = get_vectype_for_scalar_type (type);
1970 if (vectype == NULL_TREE)
1971 return NULL;
1973 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1974 return NULL;
1976 if (itype == NULL_TREE)
1977 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1978 TYPE_UNSIGNED (type));
1980 if (itype == NULL_TREE
1981 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1982 return NULL;
1984 vecitype = get_vectype_for_scalar_type (itype);
1985 if (vecitype == NULL_TREE)
1986 return NULL;
1988 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1989 return NULL;
1991 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1993 if ((TREE_CODE (then_clause) == INTEGER_CST
1994 && !int_fits_type_p (then_clause, itype))
1995 || (TREE_CODE (else_clause) == INTEGER_CST
1996 && !int_fits_type_p (else_clause, itype)))
1997 return NULL;
2000 def_stmt
2001 = gimple_build_assign_with_ops3 (COND_EXPR,
2002 vect_recog_temp_ssa_var (itype, NULL),
2003 unshare_expr (cond_expr),
2004 fold_convert (itype, then_clause),
2005 fold_convert (itype, else_clause));
2006 pattern_stmt
2007 = gimple_build_assign_with_ops (NOP_EXPR,
2008 vect_recog_temp_ssa_var (type, NULL),
2009 gimple_assign_lhs (def_stmt), NULL_TREE);
2011 new_pattern_def_seq (stmt_vinfo, def_stmt);
2012 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2013 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2014 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2015 *type_in = vecitype;
2016 *type_out = vectype;
2018 if (vect_print_dump_info (REPORT_DETAILS))
2019 fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: ");
2021 return pattern_stmt;
2025 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2026 true if bool VAR can be optimized that way. */
2028 static bool
2029 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2031 gimple def_stmt;
2032 enum vect_def_type dt;
2033 tree def, rhs1;
2034 enum tree_code rhs_code;
2036 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2037 &dt))
2038 return false;
2040 if (dt != vect_internal_def)
2041 return false;
2043 if (!is_gimple_assign (def_stmt))
2044 return false;
2046 if (!has_single_use (def))
2047 return false;
2049 rhs1 = gimple_assign_rhs1 (def_stmt);
2050 rhs_code = gimple_assign_rhs_code (def_stmt);
2051 switch (rhs_code)
2053 case SSA_NAME:
2054 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2056 CASE_CONVERT:
2057 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2058 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2059 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2060 return false;
2061 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2063 case BIT_NOT_EXPR:
2064 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2066 case BIT_AND_EXPR:
2067 case BIT_IOR_EXPR:
2068 case BIT_XOR_EXPR:
2069 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2070 return false;
2071 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2072 bb_vinfo);
2074 default:
2075 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2077 tree vecitype, comp_vectype;
2079 /* If the comparison can throw, then is_gimple_condexpr will be
2080 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2081 if (stmt_could_throw_p (def_stmt))
2082 return false;
2084 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2085 if (comp_vectype == NULL_TREE)
2086 return false;
2088 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2090 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2091 tree itype
2092 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2093 vecitype = get_vectype_for_scalar_type (itype);
2094 if (vecitype == NULL_TREE)
2095 return false;
2097 else
2098 vecitype = comp_vectype;
2099 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2101 return false;
2106 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2107 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2108 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2110 static tree
2111 adjust_bool_pattern_cast (tree type, tree var)
2113 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2114 gimple cast_stmt, pattern_stmt;
2116 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2117 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2118 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2119 cast_stmt
2120 = gimple_build_assign_with_ops (NOP_EXPR,
2121 vect_recog_temp_ssa_var (type, NULL),
2122 gimple_assign_lhs (pattern_stmt),
2123 NULL_TREE);
2124 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2125 return gimple_assign_lhs (cast_stmt);
2129 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2130 recursively. VAR is an SSA_NAME that should be transformed from bool
2131 to a wider integer type, OUT_TYPE is the desired final integer type of
2132 the whole pattern, TRUEVAL should be NULL unless optimizing
2133 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2134 in the then_clause, STMTS is where statements with added pattern stmts
2135 should be pushed to. */
2137 static tree
2138 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2139 VEC (gimple, heap) **stmts)
2141 gimple stmt = SSA_NAME_DEF_STMT (var);
2142 enum tree_code rhs_code, def_rhs_code;
2143 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2144 location_t loc;
2145 gimple pattern_stmt, def_stmt;
2147 rhs1 = gimple_assign_rhs1 (stmt);
2148 rhs2 = gimple_assign_rhs2 (stmt);
2149 rhs_code = gimple_assign_rhs_code (stmt);
2150 loc = gimple_location (stmt);
2151 switch (rhs_code)
2153 case SSA_NAME:
2154 CASE_CONVERT:
2155 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2156 itype = TREE_TYPE (irhs1);
2157 pattern_stmt
2158 = gimple_build_assign_with_ops (SSA_NAME,
2159 vect_recog_temp_ssa_var (itype, NULL),
2160 irhs1, NULL_TREE);
2161 break;
2163 case BIT_NOT_EXPR:
2164 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2165 itype = TREE_TYPE (irhs1);
2166 pattern_stmt
2167 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2168 vect_recog_temp_ssa_var (itype, NULL),
2169 irhs1, build_int_cst (itype, 1));
2170 break;
2172 case BIT_AND_EXPR:
2173 /* Try to optimize x = y & (a < b ? 1 : 0); into
2174 x = (a < b ? y : 0);
2176 E.g. for:
2177 bool a_b, b_b, c_b;
2178 TYPE d_T;
2180 S1 a_b = x1 CMP1 y1;
2181 S2 b_b = x2 CMP2 y2;
2182 S3 c_b = a_b & b_b;
2183 S4 d_T = (TYPE) c_b;
2185 we would normally emit:
2187 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2188 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2189 S3' c_T = a_T & b_T;
2190 S4' d_T = c_T;
2192 but we can save one stmt by using the
2193 result of one of the COND_EXPRs in the other COND_EXPR and leave
2194 BIT_AND_EXPR stmt out:
2196 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2197 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2198 S4' f_T = c_T;
2200 At least when VEC_COND_EXPR is implemented using masks
2201 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2202 computes the comparison masks and ands it, in one case with
2203 all ones vector, in the other case with a vector register.
2204 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2205 often more expensive. */
2206 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2207 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2208 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2210 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2211 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2212 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2213 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2215 gimple tstmt;
2216 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2217 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2218 tstmt = VEC_pop (gimple, *stmts);
2219 gcc_assert (tstmt == def_stmt);
2220 VEC_quick_push (gimple, *stmts, stmt);
2221 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2222 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2223 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2224 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2225 return irhs2;
2227 else
2228 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2229 goto and_ior_xor;
2231 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2232 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2233 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2235 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2236 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2237 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2238 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2240 gimple tstmt;
2241 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2242 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2243 tstmt = VEC_pop (gimple, *stmts);
2244 gcc_assert (tstmt == def_stmt);
2245 VEC_quick_push (gimple, *stmts, stmt);
2246 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2247 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2248 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2249 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2250 return irhs1;
2252 else
2253 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2254 goto and_ior_xor;
2256 /* FALLTHRU */
2257 case BIT_IOR_EXPR:
2258 case BIT_XOR_EXPR:
2259 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2260 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2261 and_ior_xor:
2262 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2263 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2265 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2266 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2267 int out_prec = TYPE_PRECISION (out_type);
2268 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2269 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2270 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2271 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2272 else
2274 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2275 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2278 itype = TREE_TYPE (irhs1);
2279 pattern_stmt
2280 = gimple_build_assign_with_ops (rhs_code,
2281 vect_recog_temp_ssa_var (itype, NULL),
2282 irhs1, irhs2);
2283 break;
2285 default:
2286 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2287 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2288 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2290 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2291 itype
2292 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2294 else
2295 itype = TREE_TYPE (rhs1);
2296 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2297 if (trueval == NULL_TREE)
2298 trueval = build_int_cst (itype, 1);
2299 else
2300 gcc_checking_assert (useless_type_conversion_p (itype,
2301 TREE_TYPE (trueval)));
2302 pattern_stmt
2303 = gimple_build_assign_with_ops3 (COND_EXPR,
2304 vect_recog_temp_ssa_var (itype, NULL),
2305 cond_expr, trueval,
2306 build_int_cst (itype, 0));
2307 break;
2310 VEC_safe_push (gimple, heap, *stmts, stmt);
2311 gimple_set_location (pattern_stmt, loc);
2312 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2313 return gimple_assign_lhs (pattern_stmt);
2317 /* Function vect_recog_bool_pattern
2319 Try to find pattern like following:
2321 bool a_b, b_b, c_b, d_b, e_b;
2322 TYPE f_T;
2323 loop:
2324 S1 a_b = x1 CMP1 y1;
2325 S2 b_b = x2 CMP2 y2;
2326 S3 c_b = a_b & b_b;
2327 S4 d_b = x3 CMP3 y3;
2328 S5 e_b = c_b | d_b;
2329 S6 f_T = (TYPE) e_b;
2331 where type 'TYPE' is an integral type.
2333 Input:
2335 * LAST_STMT: A stmt at the end from which the pattern
2336 search begins, i.e. cast of a bool to
2337 an integer type.
2339 Output:
2341 * TYPE_IN: The type of the input arguments to the pattern.
2343 * TYPE_OUT: The type of the output of this pattern.
2345 * Return value: A new stmt that will be used to replace the pattern.
2347 Assuming size of TYPE is the same as size of all comparisons
2348 (otherwise some casts would be added where needed), the above
2349 sequence we create related pattern stmts:
2350 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2351 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2352 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2353 S5' e_T = c_T | d_T;
2354 S6' f_T = e_T;
2356 Instead of the above S3' we could emit:
2357 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2358 S3' c_T = a_T | b_T;
2359 but the above is more efficient. */
2361 static gimple
2362 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2363 tree *type_out)
2365 gimple last_stmt = VEC_pop (gimple, *stmts);
2366 enum tree_code rhs_code;
2367 tree var, lhs, rhs, vectype;
2368 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2369 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2370 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2371 gimple pattern_stmt;
2373 if (!is_gimple_assign (last_stmt))
2374 return NULL;
2376 var = gimple_assign_rhs1 (last_stmt);
2377 lhs = gimple_assign_lhs (last_stmt);
2379 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2380 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2381 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2382 return NULL;
2384 rhs_code = gimple_assign_rhs_code (last_stmt);
2385 if (CONVERT_EXPR_CODE_P (rhs_code))
2387 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2388 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2389 return NULL;
2390 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2391 if (vectype == NULL_TREE)
2392 return NULL;
2394 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2395 return NULL;
2397 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2398 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2399 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2400 pattern_stmt
2401 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2402 else
2403 pattern_stmt
2404 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2405 *type_out = vectype;
2406 *type_in = vectype;
2407 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2408 if (vect_print_dump_info (REPORT_DETAILS))
2409 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2411 return pattern_stmt;
2413 else if (rhs_code == SSA_NAME
2414 && STMT_VINFO_DATA_REF (stmt_vinfo))
2416 stmt_vec_info pattern_stmt_info;
2417 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2418 gcc_assert (vectype != NULL_TREE);
2419 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2420 return NULL;
2421 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2422 return NULL;
2424 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2425 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2426 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2428 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2429 gimple cast_stmt
2430 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2431 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2432 rhs = rhs2;
2434 pattern_stmt
2435 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2436 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2437 bb_vinfo);
2438 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2439 STMT_VINFO_DATA_REF (pattern_stmt_info)
2440 = STMT_VINFO_DATA_REF (stmt_vinfo);
2441 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2442 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2443 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2444 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2445 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2446 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2447 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2448 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2449 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2450 *type_out = vectype;
2451 *type_in = vectype;
2452 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2453 if (vect_print_dump_info (REPORT_DETAILS))
2454 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2455 return pattern_stmt;
2457 else
2458 return NULL;
2462 /* Mark statements that are involved in a pattern. */
2464 static inline void
2465 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2466 tree pattern_vectype)
2468 stmt_vec_info pattern_stmt_info, def_stmt_info;
2469 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2470 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2471 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2472 gimple def_stmt;
2474 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2475 if (pattern_stmt_info == NULL)
2477 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2478 bb_vinfo);
2479 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2481 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2483 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2484 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2485 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2486 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2487 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2488 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2489 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2490 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2491 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2493 gimple_stmt_iterator si;
2494 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2495 !gsi_end_p (si); gsi_next (&si))
2497 def_stmt = gsi_stmt (si);
2498 def_stmt_info = vinfo_for_stmt (def_stmt);
2499 if (def_stmt_info == NULL)
2501 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2502 bb_vinfo);
2503 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2505 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2506 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2507 STMT_VINFO_DEF_TYPE (def_stmt_info)
2508 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2509 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2510 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2515 /* Function vect_pattern_recog_1
2517 Input:
2518 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2519 computation pattern.
2520 STMT: A stmt from which the pattern search should start.
2522 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2523 expression that computes the same functionality and can be used to
2524 replace the sequence of stmts that are involved in the pattern.
2526 Output:
2527 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2528 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2529 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2530 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2531 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2532 to the available target pattern.
2534 This function also does some bookkeeping, as explained in the documentation
2535 for vect_recog_pattern. */
2537 static void
2538 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2539 gimple_stmt_iterator si,
2540 VEC (gimple, heap) **stmts_to_replace)
2542 gimple stmt = gsi_stmt (si), pattern_stmt;
2543 stmt_vec_info stmt_info;
2544 loop_vec_info loop_vinfo;
2545 tree pattern_vectype;
2546 tree type_in, type_out;
2547 enum tree_code code;
2548 int i;
2549 gimple next;
2551 VEC_truncate (gimple, *stmts_to_replace, 0);
2552 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2553 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2554 if (!pattern_stmt)
2555 return;
2557 stmt = VEC_last (gimple, *stmts_to_replace);
2558 stmt_info = vinfo_for_stmt (stmt);
2559 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2561 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2563 /* No need to check target support (already checked by the pattern
2564 recognition function). */
2565 pattern_vectype = type_out ? type_out : type_in;
2567 else
2569 enum machine_mode vec_mode;
2570 enum insn_code icode;
2571 optab optab;
2573 /* Check target support */
2574 type_in = get_vectype_for_scalar_type (type_in);
2575 if (!type_in)
2576 return;
2577 if (type_out)
2578 type_out = get_vectype_for_scalar_type (type_out);
2579 else
2580 type_out = type_in;
2581 if (!type_out)
2582 return;
2583 pattern_vectype = type_out;
2585 if (is_gimple_assign (pattern_stmt))
2586 code = gimple_assign_rhs_code (pattern_stmt);
2587 else
2589 gcc_assert (is_gimple_call (pattern_stmt));
2590 code = CALL_EXPR;
2593 optab = optab_for_tree_code (code, type_in, optab_default);
2594 vec_mode = TYPE_MODE (type_in);
2595 if (!optab
2596 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2597 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2598 return;
2601 /* Found a vectorizable pattern. */
2602 if (vect_print_dump_info (REPORT_DETAILS))
2604 fprintf (vect_dump, "pattern recognized: ");
2605 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2608 /* Mark the stmts that are involved in the pattern. */
2609 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2611 /* Patterns cannot be vectorized using SLP, because they change the order of
2612 computation. */
2613 if (loop_vinfo)
2614 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2615 if (next == stmt)
2616 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2618 /* It is possible that additional pattern stmts are created and inserted in
2619 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2620 relevant statements. */
2621 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2622 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2623 i++)
2625 stmt_info = vinfo_for_stmt (stmt);
2626 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2627 if (vect_print_dump_info (REPORT_DETAILS))
2629 fprintf (vect_dump, "additional pattern stmt: ");
2630 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2633 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2638 /* Function vect_pattern_recog
2640 Input:
2641 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2642 computation idioms.
2644 Output - for each computation idiom that is detected we create a new stmt
2645 that provides the same functionality and that can be vectorized. We
2646 also record some information in the struct_stmt_info of the relevant
2647 stmts, as explained below:
2649 At the entry to this function we have the following stmts, with the
2650 following initial value in the STMT_VINFO fields:
2652 stmt in_pattern_p related_stmt vec_stmt
2653 S1: a_i = .... - - -
2654 S2: a_2 = ..use(a_i).. - - -
2655 S3: a_1 = ..use(a_2).. - - -
2656 S4: a_0 = ..use(a_1).. - - -
2657 S5: ... = ..use(a_0).. - - -
2659 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2660 represented by a single stmt. We then:
2661 - create a new stmt S6 equivalent to the pattern (the stmt is not
2662 inserted into the code)
2663 - fill in the STMT_VINFO fields as follows:
2665 in_pattern_p related_stmt vec_stmt
2666 S1: a_i = .... - - -
2667 S2: a_2 = ..use(a_i).. - - -
2668 S3: a_1 = ..use(a_2).. - - -
2669 S4: a_0 = ..use(a_1).. true S6 -
2670 '---> S6: a_new = .... - S4 -
2671 S5: ... = ..use(a_0).. - - -
2673 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2674 to each other through the RELATED_STMT field).
2676 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2677 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2678 remain irrelevant unless used by stmts other than S4.
2680 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2681 (because they are marked as irrelevant). It will vectorize S6, and record
2682 a pointer to the new vector stmt VS6 from S6 (as usual).
2683 S4 will be skipped, and S5 will be vectorized as usual:
2685 in_pattern_p related_stmt vec_stmt
2686 S1: a_i = .... - - -
2687 S2: a_2 = ..use(a_i).. - - -
2688 S3: a_1 = ..use(a_2).. - - -
2689 > VS6: va_new = .... - - -
2690 S4: a_0 = ..use(a_1).. true S6 VS6
2691 '---> S6: a_new = .... - S4 VS6
2692 > VS5: ... = ..vuse(va_new).. - - -
2693 S5: ... = ..use(a_0).. - - -
2695 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2696 elsewhere), and we'll end up with:
2698 VS6: va_new = ....
2699 VS5: ... = ..vuse(va_new)..
2701 In case of more than one pattern statements, e.g., widen-mult with
2702 intermediate type:
2704 S1 a_t = ;
2705 S2 a_T = (TYPE) a_t;
2706 '--> S3: a_it = (interm_type) a_t;
2707 S4 prod_T = a_T * CONST;
2708 '--> S5: prod_T' = a_it w* CONST;
2710 there may be other users of a_T outside the pattern. In that case S2 will
2711 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2712 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2713 be recorded in S3. */
2715 void
2716 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2718 struct loop *loop;
2719 basic_block *bbs, bb;
2720 unsigned int nbbs;
2721 gimple_stmt_iterator si;
2722 unsigned int i, j;
2723 vect_recog_func_ptr vect_recog_func;
2724 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2725 gimple stmt;
2727 if (vect_print_dump_info (REPORT_DETAILS))
2728 fprintf (vect_dump, "=== vect_pattern_recog ===");
2730 if (loop_vinfo)
2732 loop = LOOP_VINFO_LOOP (loop_vinfo);
2733 bbs = LOOP_VINFO_BBS (loop_vinfo);
2734 nbbs = loop->num_nodes;
2736 else
2738 bb = BB_VINFO_BB (bb_vinfo);
2739 nbbs = 1;
2740 bbs = XNEW (basic_block);
2741 bbs[0] = bb;
2744 /* Scan through the loop stmts, applying the pattern recognition
2745 functions starting at each stmt visited: */
2746 for (i = 0; i < nbbs; i++)
2748 basic_block bb = bbs[i];
2749 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2751 if (bb_vinfo && (stmt = gsi_stmt (si))
2752 && vinfo_for_stmt (stmt)
2753 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2754 continue;
2756 /* Scan over all generic vect_recog_xxx_pattern functions. */
2757 for (j = 0; j < NUM_PATTERNS; j++)
2759 vect_recog_func = vect_vect_recog_func_ptrs[j];
2760 vect_pattern_recog_1 (vect_recog_func, si,
2761 &stmts_to_replace);
2766 VEC_free (gimple, heap, stmts_to_replace);
2767 if (bb_vinfo)
2768 free (bbs);