* cp-tree.h (struct deferred_access_check): Add location.
[official-gcc.git] / gcc / tree-vect-patterns.c
blobe8ac42acad7023156c08fba859300d5dee1e34f1
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "cfgloop.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "params.h"
36 #include "tree-data-ref.h"
37 #include "tree-vectorizer.h"
38 #include "recog.h" /* FIXME: for insn_data */
39 #include "diagnostic-core.h"
40 #include "dumpfile.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 tree *, tree *);
56 static gimple vect_recog_divmod_pattern (VEC (gimple, heap) **,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_widen_shift_pattern,
67 vect_recog_over_widening_pattern,
68 vect_recog_vector_vector_shift_pattern,
69 vect_recog_divmod_pattern,
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
73 static inline void
74 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 stmt);
80 static inline void
81 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
87 /* Check whether STMT2 is in the same loop or basic block as STMT1.
88 Which of the two applies depends on whether we're currently doing
89 loop-based or basic-block-based vectorization, as determined by
90 the vinfo_for_stmt for STMT1 (which must be defined).
92 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
93 to be defined as well. */
95 static bool
96 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
98 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
99 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
100 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
102 if (!gimple_bb (stmt2))
103 return false;
105 if (loop_vinfo)
107 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
108 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
109 return false;
111 else
113 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
114 || gimple_code (stmt2) == GIMPLE_PHI)
115 return false;
118 gcc_assert (vinfo_for_stmt (stmt2));
119 return true;
122 /* If the LHS of DEF_STMT has a single use, and that statement is
123 in the same loop or basic block, return it. */
125 static gimple
126 vect_single_imm_use (gimple def_stmt)
128 tree lhs = gimple_assign_lhs (def_stmt);
129 use_operand_p use_p;
130 gimple use_stmt;
132 if (!single_imm_use (lhs, &use_p, &use_stmt))
133 return NULL;
135 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
136 return NULL;
138 return use_stmt;
141 /* Check whether NAME, an ssa-name used in USE_STMT,
142 is a result of a type promotion or demotion, such that:
143 DEF_STMT: NAME = NOP (name0)
144 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
145 If CHECK_SIGN is TRUE, check that either both types are signed or both are
146 unsigned. */
148 static bool
149 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
150 tree *orig_type, gimple *def_stmt, bool *promotion)
152 tree dummy;
153 gimple dummy_gimple;
154 loop_vec_info loop_vinfo;
155 stmt_vec_info stmt_vinfo;
156 tree type = TREE_TYPE (name);
157 tree oprnd0;
158 enum vect_def_type dt;
159 tree def;
160 bb_vec_info bb_vinfo;
162 stmt_vinfo = vinfo_for_stmt (use_stmt);
163 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
164 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
165 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
166 &def, &dt))
167 return false;
169 if (dt != vect_internal_def
170 && dt != vect_external_def && dt != vect_constant_def)
171 return false;
173 if (!*def_stmt)
174 return false;
176 if (!is_gimple_assign (*def_stmt))
177 return false;
179 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
180 return false;
182 oprnd0 = gimple_assign_rhs1 (*def_stmt);
184 *orig_type = TREE_TYPE (oprnd0);
185 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
186 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
187 return false;
189 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
190 *promotion = true;
191 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
192 *promotion = false;
193 else
194 return false;
196 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
197 bb_vinfo, &dummy_gimple, &dummy, &dt))
198 return false;
200 return true;
203 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
204 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
206 static tree
207 vect_recog_temp_ssa_var (tree type, gimple stmt)
209 tree var = create_tmp_var (type, "patt");
211 add_referenced_var (var);
212 var = make_ssa_name (var, stmt);
213 return var;
216 /* Function vect_recog_dot_prod_pattern
218 Try to find the following pattern:
220 type x_t, y_t;
221 TYPE1 prod;
222 TYPE2 sum = init;
223 loop:
224 sum_0 = phi <init, sum_1>
225 S1 x_t = ...
226 S2 y_t = ...
227 S3 x_T = (TYPE1) x_t;
228 S4 y_T = (TYPE1) y_t;
229 S5 prod = x_T * y_T;
230 [S6 prod = (TYPE2) prod; #optional]
231 S7 sum_1 = prod + sum_0;
233 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
234 same size of 'TYPE1' or bigger. This is a special case of a reduction
235 computation.
237 Input:
239 * STMTS: Contains a stmt from which the pattern search begins. In the
240 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
241 will be detected.
243 Output:
245 * TYPE_IN: The type of the input arguments to the pattern.
247 * TYPE_OUT: The type of the output of this pattern.
249 * Return value: A new stmt that will be used to replace the sequence of
250 stmts that constitute the pattern. In this case it will be:
251 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
253 Note: The dot-prod idiom is a widening reduction pattern that is
254 vectorized without preserving all the intermediate results. It
255 produces only N/2 (widened) results (by summing up pairs of
256 intermediate results) rather than all N results. Therefore, we
257 cannot allow this pattern when we want to get all the results and in
258 the correct order (as is the case when this computation is in an
259 inner-loop nested in an outer-loop that us being vectorized). */
261 static gimple
262 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
263 tree *type_out)
265 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
266 tree oprnd0, oprnd1;
267 tree oprnd00, oprnd01;
268 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
269 tree type, half_type;
270 gimple pattern_stmt;
271 tree prod_type;
272 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
273 struct loop *loop;
274 tree var;
275 bool promotion;
277 if (!loop_info)
278 return NULL;
280 loop = LOOP_VINFO_LOOP (loop_info);
282 if (!is_gimple_assign (last_stmt))
283 return NULL;
285 type = gimple_expr_type (last_stmt);
287 /* Look for the following pattern
288 DX = (TYPE1) X;
289 DY = (TYPE1) Y;
290 DPROD = DX * DY;
291 DDPROD = (TYPE2) DPROD;
292 sum_1 = DDPROD + sum_0;
293 In which
294 - DX is double the size of X
295 - DY is double the size of Y
296 - DX, DY, DPROD all have the same type
297 - sum is the same size of DPROD or bigger
298 - sum has been recognized as a reduction variable.
300 This is equivalent to:
301 DPROD = X w* Y; #widen mult
302 sum_1 = DPROD w+ sum_0; #widen summation
304 DPROD = X w* Y; #widen mult
305 sum_1 = DPROD + sum_0; #summation
308 /* Starting from LAST_STMT, follow the defs of its uses in search
309 of the above pattern. */
311 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
312 return NULL;
314 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
316 /* Has been detected as widening-summation? */
318 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
319 type = gimple_expr_type (stmt);
320 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
321 return NULL;
322 oprnd0 = gimple_assign_rhs1 (stmt);
323 oprnd1 = gimple_assign_rhs2 (stmt);
324 half_type = TREE_TYPE (oprnd0);
326 else
328 gimple def_stmt;
330 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
331 return NULL;
332 oprnd0 = gimple_assign_rhs1 (last_stmt);
333 oprnd1 = gimple_assign_rhs2 (last_stmt);
334 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
335 || !types_compatible_p (TREE_TYPE (oprnd1), type))
336 return NULL;
337 stmt = last_stmt;
339 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
340 &promotion)
341 && promotion)
343 stmt = def_stmt;
344 oprnd0 = gimple_assign_rhs1 (stmt);
346 else
347 half_type = type;
350 /* So far so good. Since last_stmt was detected as a (summation) reduction,
351 we know that oprnd1 is the reduction variable (defined by a loop-header
352 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
353 Left to check that oprnd0 is defined by a (widen_)mult_expr */
354 if (TREE_CODE (oprnd0) != SSA_NAME)
355 return NULL;
357 prod_type = half_type;
358 stmt = SSA_NAME_DEF_STMT (oprnd0);
360 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
361 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
362 return NULL;
364 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
365 inside the loop (in case we are analyzing an outer-loop). */
366 if (!is_gimple_assign (stmt))
367 return NULL;
368 stmt_vinfo = vinfo_for_stmt (stmt);
369 gcc_assert (stmt_vinfo);
370 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
371 return NULL;
372 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
373 return NULL;
374 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
376 /* Has been detected as a widening multiplication? */
378 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
379 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
380 return NULL;
381 stmt_vinfo = vinfo_for_stmt (stmt);
382 gcc_assert (stmt_vinfo);
383 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
384 oprnd00 = gimple_assign_rhs1 (stmt);
385 oprnd01 = gimple_assign_rhs2 (stmt);
387 else
389 tree half_type0, half_type1;
390 gimple def_stmt;
391 tree oprnd0, oprnd1;
393 oprnd0 = gimple_assign_rhs1 (stmt);
394 oprnd1 = gimple_assign_rhs2 (stmt);
395 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
396 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
397 return NULL;
398 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
399 &promotion)
400 || !promotion)
401 return NULL;
402 oprnd00 = gimple_assign_rhs1 (def_stmt);
403 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
404 &promotion)
405 || !promotion)
406 return NULL;
407 oprnd01 = gimple_assign_rhs1 (def_stmt);
408 if (!types_compatible_p (half_type0, half_type1))
409 return NULL;
410 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
411 return NULL;
414 half_type = TREE_TYPE (oprnd00);
415 *type_in = half_type;
416 *type_out = type;
418 /* Pattern detected. Create a stmt to be used to replace the pattern: */
419 var = vect_recog_temp_ssa_var (type, NULL);
420 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
421 oprnd00, oprnd01, oprnd1);
423 if (vect_print_dump_info (REPORT_DETAILS))
425 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
426 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
429 /* We don't allow changing the order of the computation in the inner-loop
430 when doing outer-loop vectorization. */
431 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
433 return pattern_stmt;
437 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
438 and LSHIFT_EXPR.
440 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
441 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
443 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
444 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
445 that satisfies the above restrictions, we can perform a widening opeartion
446 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
447 with a_it = (interm_type) a_t; */
449 static bool
450 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
451 tree const_oprnd, tree *oprnd,
452 VEC (gimple, heap) **stmts, tree type,
453 tree *half_type, gimple def_stmt)
455 tree new_type, new_oprnd, tmp;
456 gimple new_stmt;
458 if (code != MULT_EXPR && code != LSHIFT_EXPR)
459 return false;
461 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
462 || (code == LSHIFT_EXPR
463 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
464 != 1))
465 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
467 /* CONST_OPRND is a constant of HALF_TYPE. */
468 *oprnd = gimple_assign_rhs1 (def_stmt);
469 return true;
472 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
473 return false;
475 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
476 return false;
478 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
479 a type 2 times bigger than HALF_TYPE. */
480 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
481 TYPE_UNSIGNED (type));
482 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
483 || (code == LSHIFT_EXPR
484 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
485 return false;
487 /* Use NEW_TYPE for widening operation. */
488 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
490 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
491 /* Check if the already created pattern stmt is what we need. */
492 if (!is_gimple_assign (new_stmt)
493 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
494 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
495 return false;
497 VEC_safe_push (gimple, heap, *stmts, def_stmt);
498 *oprnd = gimple_assign_lhs (new_stmt);
500 else
502 /* Create a_T = (NEW_TYPE) a_t; */
503 *oprnd = gimple_assign_rhs1 (def_stmt);
504 tmp = create_tmp_var (new_type, NULL);
505 add_referenced_var (tmp);
506 new_oprnd = make_ssa_name (tmp, NULL);
507 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
508 NULL_TREE);
509 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
510 VEC_safe_push (gimple, heap, *stmts, def_stmt);
511 *oprnd = new_oprnd;
514 *half_type = new_type;
515 return true;
519 /* Function vect_recog_widen_mult_pattern
521 Try to find the following pattern:
523 type a_t, b_t;
524 TYPE a_T, b_T, prod_T;
526 S1 a_t = ;
527 S2 b_t = ;
528 S3 a_T = (TYPE) a_t;
529 S4 b_T = (TYPE) b_t;
530 S5 prod_T = a_T * b_T;
532 where type 'TYPE' is at least double the size of type 'type'.
534 Also detect unsigned cases:
536 unsigned type a_t, b_t;
537 unsigned TYPE u_prod_T;
538 TYPE a_T, b_T, prod_T;
540 S1 a_t = ;
541 S2 b_t = ;
542 S3 a_T = (TYPE) a_t;
543 S4 b_T = (TYPE) b_t;
544 S5 prod_T = a_T * b_T;
545 S6 u_prod_T = (unsigned TYPE) prod_T;
547 and multiplication by constants:
549 type a_t;
550 TYPE a_T, prod_T;
552 S1 a_t = ;
553 S3 a_T = (TYPE) a_t;
554 S5 prod_T = a_T * CONST;
556 A special case of multiplication by constants is when 'TYPE' is 4 times
557 bigger than 'type', but CONST fits an intermediate type 2 times smaller
558 than 'TYPE'. In that case we create an additional pattern stmt for S3
559 to create a variable of the intermediate type, and perform widen-mult
560 on the intermediate type as well:
562 type a_t;
563 interm_type a_it;
564 TYPE a_T, prod_T, prod_T';
566 S1 a_t = ;
567 S3 a_T = (TYPE) a_t;
568 '--> a_it = (interm_type) a_t;
569 S5 prod_T = a_T * CONST;
570 '--> prod_T' = a_it w* CONST;
572 Input/Output:
574 * STMTS: Contains a stmt from which the pattern search begins. In the
575 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
576 is detected. In case of unsigned widen-mult, the original stmt (S5) is
577 replaced with S6 in STMTS. In case of multiplication by a constant
578 of an intermediate type (the last case above), STMTS also contains S3
579 (inserted before S5).
581 Output:
583 * TYPE_IN: The type of the input arguments to the pattern.
585 * TYPE_OUT: The type of the output of this pattern.
587 * Return value: A new stmt that will be used to replace the sequence of
588 stmts that constitute the pattern. In this case it will be:
589 WIDEN_MULT <a_t, b_t>
592 static gimple
593 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
594 tree *type_in, tree *type_out)
596 gimple last_stmt = VEC_pop (gimple, *stmts);
597 gimple def_stmt0, def_stmt1;
598 tree oprnd0, oprnd1;
599 tree type, half_type0, half_type1;
600 gimple pattern_stmt;
601 tree vectype, vectype_out = NULL_TREE;
602 tree dummy;
603 tree var;
604 enum tree_code dummy_code;
605 int dummy_int;
606 VEC (tree, heap) *dummy_vec;
607 bool op1_ok;
608 bool promotion;
610 if (!is_gimple_assign (last_stmt))
611 return NULL;
613 type = gimple_expr_type (last_stmt);
615 /* Starting from LAST_STMT, follow the defs of its uses in search
616 of the above pattern. */
618 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
619 return NULL;
621 oprnd0 = gimple_assign_rhs1 (last_stmt);
622 oprnd1 = gimple_assign_rhs2 (last_stmt);
623 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
624 || !types_compatible_p (TREE_TYPE (oprnd1), type))
625 return NULL;
627 /* Check argument 0. */
628 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
629 &promotion)
630 || !promotion)
631 return NULL;
632 /* Check argument 1. */
633 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
634 &def_stmt1, &promotion);
636 if (op1_ok && promotion)
638 oprnd0 = gimple_assign_rhs1 (def_stmt0);
639 oprnd1 = gimple_assign_rhs1 (def_stmt1);
641 else
643 if (TREE_CODE (oprnd1) == INTEGER_CST
644 && TREE_CODE (half_type0) == INTEGER_TYPE
645 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
646 &oprnd0, stmts, type,
647 &half_type0, def_stmt0))
648 half_type1 = half_type0;
649 else
650 return NULL;
653 /* Handle unsigned case. Look for
654 S6 u_prod_T = (unsigned TYPE) prod_T;
655 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
656 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
658 gimple use_stmt;
659 tree use_lhs;
660 tree use_type;
662 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
663 return NULL;
665 use_stmt = vect_single_imm_use (last_stmt);
666 if (!use_stmt || !is_gimple_assign (use_stmt)
667 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
668 return NULL;
670 use_lhs = gimple_assign_lhs (use_stmt);
671 use_type = TREE_TYPE (use_lhs);
672 if (!INTEGRAL_TYPE_P (use_type)
673 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
674 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
675 return NULL;
677 type = use_type;
678 last_stmt = use_stmt;
681 if (!types_compatible_p (half_type0, half_type1))
682 return NULL;
684 /* Pattern detected. */
685 if (vect_print_dump_info (REPORT_DETAILS))
686 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
688 /* Check target support */
689 vectype = get_vectype_for_scalar_type (half_type0);
690 vectype_out = get_vectype_for_scalar_type (type);
691 if (!vectype
692 || !vectype_out
693 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
694 vectype_out, vectype,
695 &dummy, &dummy, &dummy_code,
696 &dummy_code, &dummy_int, &dummy_vec))
697 return NULL;
699 *type_in = vectype;
700 *type_out = vectype_out;
702 /* Pattern supported. Create a stmt to be used to replace the pattern: */
703 var = vect_recog_temp_ssa_var (type, NULL);
704 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
705 oprnd1);
707 if (vect_print_dump_info (REPORT_DETAILS))
708 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
710 VEC_safe_push (gimple, heap, *stmts, last_stmt);
711 return pattern_stmt;
715 /* Function vect_recog_pow_pattern
717 Try to find the following pattern:
719 x = POW (y, N);
721 with POW being one of pow, powf, powi, powif and N being
722 either 2 or 0.5.
724 Input:
726 * LAST_STMT: A stmt from which the pattern search begins.
728 Output:
730 * TYPE_IN: The type of the input arguments to the pattern.
732 * TYPE_OUT: The type of the output of this pattern.
734 * Return value: A new stmt that will be used to replace the sequence of
735 stmts that constitute the pattern. In this case it will be:
736 x = x * x
738 x = sqrt (x)
741 static gimple
742 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
743 tree *type_out)
745 gimple last_stmt = VEC_index (gimple, *stmts, 0);
746 tree fn, base, exp = NULL;
747 gimple stmt;
748 tree var;
750 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
751 return NULL;
753 fn = gimple_call_fndecl (last_stmt);
754 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
755 return NULL;
757 switch (DECL_FUNCTION_CODE (fn))
759 case BUILT_IN_POWIF:
760 case BUILT_IN_POWI:
761 case BUILT_IN_POWF:
762 case BUILT_IN_POW:
763 base = gimple_call_arg (last_stmt, 0);
764 exp = gimple_call_arg (last_stmt, 1);
765 if (TREE_CODE (exp) != REAL_CST
766 && TREE_CODE (exp) != INTEGER_CST)
767 return NULL;
768 break;
770 default:
771 return NULL;
774 /* We now have a pow or powi builtin function call with a constant
775 exponent. */
777 *type_out = NULL_TREE;
779 /* Catch squaring. */
780 if ((host_integerp (exp, 0)
781 && tree_low_cst (exp, 0) == 2)
782 || (TREE_CODE (exp) == REAL_CST
783 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
785 *type_in = TREE_TYPE (base);
787 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
788 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
789 return stmt;
792 /* Catch square root. */
793 if (TREE_CODE (exp) == REAL_CST
794 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
796 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
797 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
798 if (*type_in)
800 gimple stmt = gimple_build_call (newfn, 1, base);
801 if (vectorizable_function (stmt, *type_in, *type_in)
802 != NULL_TREE)
804 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
805 gimple_call_set_lhs (stmt, var);
806 return stmt;
811 return NULL;
815 /* Function vect_recog_widen_sum_pattern
817 Try to find the following pattern:
819 type x_t;
820 TYPE x_T, sum = init;
821 loop:
822 sum_0 = phi <init, sum_1>
823 S1 x_t = *p;
824 S2 x_T = (TYPE) x_t;
825 S3 sum_1 = x_T + sum_0;
827 where type 'TYPE' is at least double the size of type 'type', i.e - we're
828 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
829 a special case of a reduction computation.
831 Input:
833 * LAST_STMT: A stmt from which the pattern search begins. In the example,
834 when this function is called with S3, the pattern {S2,S3} will be detected.
836 Output:
838 * TYPE_IN: The type of the input arguments to the pattern.
840 * TYPE_OUT: The type of the output of this pattern.
842 * Return value: A new stmt that will be used to replace the sequence of
843 stmts that constitute the pattern. In this case it will be:
844 WIDEN_SUM <x_t, sum_0>
846 Note: The widening-sum idiom is a widening reduction pattern that is
847 vectorized without preserving all the intermediate results. It
848 produces only N/2 (widened) results (by summing up pairs of
849 intermediate results) rather than all N results. Therefore, we
850 cannot allow this pattern when we want to get all the results and in
851 the correct order (as is the case when this computation is in an
852 inner-loop nested in an outer-loop that us being vectorized). */
854 static gimple
855 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
856 tree *type_out)
858 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
859 tree oprnd0, oprnd1;
860 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
861 tree type, half_type;
862 gimple pattern_stmt;
863 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
864 struct loop *loop;
865 tree var;
866 bool promotion;
868 if (!loop_info)
869 return NULL;
871 loop = LOOP_VINFO_LOOP (loop_info);
873 if (!is_gimple_assign (last_stmt))
874 return NULL;
876 type = gimple_expr_type (last_stmt);
878 /* Look for the following pattern
879 DX = (TYPE) X;
880 sum_1 = DX + sum_0;
881 In which DX is at least double the size of X, and sum_1 has been
882 recognized as a reduction variable.
885 /* Starting from LAST_STMT, follow the defs of its uses in search
886 of the above pattern. */
888 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
889 return NULL;
891 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
892 return NULL;
894 oprnd0 = gimple_assign_rhs1 (last_stmt);
895 oprnd1 = gimple_assign_rhs2 (last_stmt);
896 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
897 || !types_compatible_p (TREE_TYPE (oprnd1), type))
898 return NULL;
900 /* So far so good. Since last_stmt was detected as a (summation) reduction,
901 we know that oprnd1 is the reduction variable (defined by a loop-header
902 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
903 Left to check that oprnd0 is defined by a cast from type 'type' to type
904 'TYPE'. */
906 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
907 &promotion)
908 || !promotion)
909 return NULL;
911 oprnd0 = gimple_assign_rhs1 (stmt);
912 *type_in = half_type;
913 *type_out = type;
915 /* Pattern detected. Create a stmt to be used to replace the pattern: */
916 var = vect_recog_temp_ssa_var (type, NULL);
917 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
918 oprnd0, oprnd1);
920 if (vect_print_dump_info (REPORT_DETAILS))
922 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
923 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
926 /* We don't allow changing the order of the computation in the inner-loop
927 when doing outer-loop vectorization. */
928 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
930 return pattern_stmt;
934 /* Return TRUE if the operation in STMT can be performed on a smaller type.
936 Input:
937 STMT - a statement to check.
938 DEF - we support operations with two operands, one of which is constant.
939 The other operand can be defined by a demotion operation, or by a
940 previous statement in a sequence of over-promoted operations. In the
941 later case DEF is used to replace that operand. (It is defined by a
942 pattern statement we created for the previous statement in the
943 sequence).
945 Input/output:
946 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
947 NULL, it's the type of DEF.
948 STMTS - additional pattern statements. If a pattern statement (type
949 conversion) is created in this function, its original statement is
950 added to STMTS.
952 Output:
953 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
954 operands to use in the new pattern statement for STMT (will be created
955 in vect_recog_over_widening_pattern ()).
956 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
957 statements for STMT: the first one is a type promotion and the second
958 one is the operation itself. We return the type promotion statement
959 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
960 the second pattern statement. */
962 static bool
963 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
964 tree *op0, tree *op1, gimple *new_def_stmt,
965 VEC (gimple, heap) **stmts)
967 enum tree_code code;
968 tree const_oprnd, oprnd;
969 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
970 gimple def_stmt, new_stmt;
971 bool first = false;
972 bool promotion;
974 *op0 = NULL_TREE;
975 *op1 = NULL_TREE;
976 *new_def_stmt = NULL;
978 if (!is_gimple_assign (stmt))
979 return false;
981 code = gimple_assign_rhs_code (stmt);
982 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
983 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
984 return false;
986 oprnd = gimple_assign_rhs1 (stmt);
987 const_oprnd = gimple_assign_rhs2 (stmt);
988 type = gimple_expr_type (stmt);
990 if (TREE_CODE (oprnd) != SSA_NAME
991 || TREE_CODE (const_oprnd) != INTEGER_CST)
992 return false;
994 /* If oprnd has other uses besides that in stmt we cannot mark it
995 as being part of a pattern only. */
996 if (!has_single_use (oprnd))
997 return false;
999 /* If we are in the middle of a sequence, we use DEF from a previous
1000 statement. Otherwise, OPRND has to be a result of type promotion. */
1001 if (*new_type)
1003 half_type = *new_type;
1004 oprnd = def;
1006 else
1008 first = true;
1009 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1010 &promotion)
1011 || !promotion
1012 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1013 return false;
1016 /* Can we perform the operation on a smaller type? */
1017 switch (code)
1019 case BIT_IOR_EXPR:
1020 case BIT_XOR_EXPR:
1021 case BIT_AND_EXPR:
1022 if (!int_fits_type_p (const_oprnd, half_type))
1024 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1025 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1026 return false;
1028 interm_type = build_nonstandard_integer_type (
1029 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1030 if (!int_fits_type_p (const_oprnd, interm_type))
1031 return false;
1034 break;
1036 case LSHIFT_EXPR:
1037 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1038 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1039 return false;
1041 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1042 (e.g., if the original value was char, the shift amount is at most 8
1043 if we want to use short). */
1044 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1045 return false;
1047 interm_type = build_nonstandard_integer_type (
1048 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1050 if (!vect_supportable_shift (code, interm_type))
1051 return false;
1053 break;
1055 case RSHIFT_EXPR:
1056 if (vect_supportable_shift (code, half_type))
1057 break;
1059 /* Try intermediate type - HALF_TYPE is not supported. */
1060 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1061 return false;
1063 interm_type = build_nonstandard_integer_type (
1064 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1066 if (!vect_supportable_shift (code, interm_type))
1067 return false;
1069 break;
1071 default:
1072 gcc_unreachable ();
1075 /* There are four possible cases:
1076 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1077 the first statement in the sequence)
1078 a. The original, HALF_TYPE, is not enough - we replace the promotion
1079 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1080 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1081 promotion.
1082 2. OPRND is defined by a pattern statement we created.
1083 a. Its type is not sufficient for the operation, we create a new stmt:
1084 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1085 this statement in NEW_DEF_STMT, and it is later put in
1086 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1087 b. OPRND is good to use in the new statement. */
1088 if (first)
1090 if (interm_type)
1092 /* Replace the original type conversion HALF_TYPE->TYPE with
1093 HALF_TYPE->INTERM_TYPE. */
1094 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1096 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1097 /* Check if the already created pattern stmt is what we need. */
1098 if (!is_gimple_assign (new_stmt)
1099 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1100 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1101 return false;
1103 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1104 oprnd = gimple_assign_lhs (new_stmt);
1106 else
1108 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1109 oprnd = gimple_assign_rhs1 (def_stmt);
1110 tmp = create_tmp_reg (interm_type, NULL);
1111 add_referenced_var (tmp);
1112 new_oprnd = make_ssa_name (tmp, NULL);
1113 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1114 oprnd, NULL_TREE);
1115 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1116 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1117 oprnd = new_oprnd;
1120 else
1122 /* Retrieve the operand before the type promotion. */
1123 oprnd = gimple_assign_rhs1 (def_stmt);
1126 else
1128 if (interm_type)
1130 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1131 tmp = create_tmp_reg (interm_type, NULL);
1132 add_referenced_var (tmp);
1133 new_oprnd = make_ssa_name (tmp, NULL);
1134 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1135 oprnd, NULL_TREE);
1136 oprnd = new_oprnd;
1137 *new_def_stmt = new_stmt;
1140 /* Otherwise, OPRND is already set. */
1143 if (interm_type)
1144 *new_type = interm_type;
1145 else
1146 *new_type = half_type;
1148 *op0 = oprnd;
1149 *op1 = fold_convert (*new_type, const_oprnd);
1151 return true;
1155 /* Try to find a statement or a sequence of statements that can be performed
1156 on a smaller type:
1158 type x_t;
1159 TYPE x_T, res0_T, res1_T;
1160 loop:
1161 S1 x_t = *p;
1162 S2 x_T = (TYPE) x_t;
1163 S3 res0_T = op (x_T, C0);
1164 S4 res1_T = op (res0_T, C1);
1165 S5 ... = () res1_T; - type demotion
1167 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1168 constants.
1169 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1170 be 'type' or some intermediate type. For now, we expect S5 to be a type
1171 demotion operation. We also check that S3 and S4 have only one use. */
1173 static gimple
1174 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1175 tree *type_in, tree *type_out)
1177 gimple stmt = VEC_pop (gimple, *stmts);
1178 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1179 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1180 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1181 bool first;
1182 tree type = NULL;
1184 first = true;
1185 while (1)
1187 if (!vinfo_for_stmt (stmt)
1188 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1189 return NULL;
1191 new_def_stmt = NULL;
1192 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1193 &op0, &op1, &new_def_stmt,
1194 stmts))
1196 if (first)
1197 return NULL;
1198 else
1199 break;
1202 /* STMT can be performed on a smaller type. Check its uses. */
1203 use_stmt = vect_single_imm_use (stmt);
1204 if (!use_stmt || !is_gimple_assign (use_stmt))
1205 return NULL;
1207 /* Create pattern statement for STMT. */
1208 vectype = get_vectype_for_scalar_type (new_type);
1209 if (!vectype)
1210 return NULL;
1212 /* We want to collect all the statements for which we create pattern
1213 statetments, except for the case when the last statement in the
1214 sequence doesn't have a corresponding pattern statement. In such
1215 case we associate the last pattern statement with the last statement
1216 in the sequence. Therefore, we only add the original statement to
1217 the list if we know that it is not the last. */
1218 if (prev_stmt)
1219 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1221 var = vect_recog_temp_ssa_var (new_type, NULL);
1222 pattern_stmt
1223 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1224 op0, op1);
1225 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1226 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1228 if (vect_print_dump_info (REPORT_DETAILS))
1230 fprintf (vect_dump, "created pattern stmt: ");
1231 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1234 type = gimple_expr_type (stmt);
1235 prev_stmt = stmt;
1236 stmt = use_stmt;
1238 first = false;
1241 /* We got a sequence. We expect it to end with a type demotion operation.
1242 Otherwise, we quit (for now). There are three possible cases: the
1243 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1244 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1245 NEW_TYPE differs (we create a new conversion statement). */
1246 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1248 use_lhs = gimple_assign_lhs (use_stmt);
1249 use_type = TREE_TYPE (use_lhs);
1250 /* Support only type demotion or signedess change. */
1251 if (!INTEGRAL_TYPE_P (use_type)
1252 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1253 return NULL;
1255 /* Check that NEW_TYPE is not bigger than the conversion result. */
1256 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1257 return NULL;
1259 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1260 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1262 /* Create NEW_TYPE->USE_TYPE conversion. */
1263 tmp = create_tmp_reg (use_type, NULL);
1264 add_referenced_var (tmp);
1265 new_oprnd = make_ssa_name (tmp, NULL);
1266 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1267 var, NULL_TREE);
1268 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1270 *type_in = get_vectype_for_scalar_type (new_type);
1271 *type_out = get_vectype_for_scalar_type (use_type);
1273 /* We created a pattern statement for the last statement in the
1274 sequence, so we don't need to associate it with the pattern
1275 statement created for PREV_STMT. Therefore, we add PREV_STMT
1276 to the list in order to mark it later in vect_pattern_recog_1. */
1277 if (prev_stmt)
1278 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1280 else
1282 if (prev_stmt)
1283 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1284 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1286 *type_in = vectype;
1287 *type_out = NULL_TREE;
1290 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1292 else
1293 /* TODO: support general case, create a conversion to the correct type. */
1294 return NULL;
1296 /* Pattern detected. */
1297 if (vect_print_dump_info (REPORT_DETAILS))
1299 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1300 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1303 return pattern_stmt;
1306 /* Detect widening shift pattern:
1308 type a_t;
1309 TYPE a_T, res_T;
1311 S1 a_t = ;
1312 S2 a_T = (TYPE) a_t;
1313 S3 res_T = a_T << CONST;
1315 where type 'TYPE' is at least double the size of type 'type'.
1317 Also detect cases where the shift result is immediately converted
1318 to another type 'result_type' that is no larger in size than 'TYPE'.
1319 In those cases we perform a widen-shift that directly results in
1320 'result_type', to avoid a possible over-widening situation:
1322 type a_t;
1323 TYPE a_T, res_T;
1324 result_type res_result;
1326 S1 a_t = ;
1327 S2 a_T = (TYPE) a_t;
1328 S3 res_T = a_T << CONST;
1329 S4 res_result = (result_type) res_T;
1330 '--> res_result' = a_t w<< CONST;
1332 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1333 create an additional pattern stmt for S2 to create a variable of an
1334 intermediate type, and perform widen-shift on the intermediate type:
1336 type a_t;
1337 interm_type a_it;
1338 TYPE a_T, res_T, res_T';
1340 S1 a_t = ;
1341 S2 a_T = (TYPE) a_t;
1342 '--> a_it = (interm_type) a_t;
1343 S3 res_T = a_T << CONST;
1344 '--> res_T' = a_it <<* CONST;
1346 Input/Output:
1348 * STMTS: Contains a stmt from which the pattern search begins.
1349 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1350 in STMTS. When an intermediate type is used and a pattern statement is
1351 created for S2, we also put S2 here (before S3).
1353 Output:
1355 * TYPE_IN: The type of the input arguments to the pattern.
1357 * TYPE_OUT: The type of the output of this pattern.
1359 * Return value: A new stmt that will be used to replace the sequence of
1360 stmts that constitute the pattern. In this case it will be:
1361 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1363 static gimple
1364 vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1365 tree *type_in, tree *type_out)
1367 gimple last_stmt = VEC_pop (gimple, *stmts);
1368 gimple def_stmt0;
1369 tree oprnd0, oprnd1;
1370 tree type, half_type0;
1371 gimple pattern_stmt;
1372 tree vectype, vectype_out = NULL_TREE;
1373 tree dummy;
1374 tree var;
1375 enum tree_code dummy_code;
1376 int dummy_int;
1377 VEC (tree, heap) * dummy_vec;
1378 gimple use_stmt;
1379 bool promotion;
1381 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1382 return NULL;
1384 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1385 return NULL;
1387 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1388 return NULL;
1390 oprnd0 = gimple_assign_rhs1 (last_stmt);
1391 oprnd1 = gimple_assign_rhs2 (last_stmt);
1392 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1393 return NULL;
1395 /* Check operand 0: it has to be defined by a type promotion. */
1396 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1397 &promotion)
1398 || !promotion)
1399 return NULL;
1401 /* Check operand 1: has to be positive. We check that it fits the type
1402 in vect_handle_widen_op_by_const (). */
1403 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1404 return NULL;
1406 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1407 type = gimple_expr_type (last_stmt);
1409 /* Check for subsequent conversion to another type. */
1410 use_stmt = vect_single_imm_use (last_stmt);
1411 if (use_stmt && is_gimple_assign (use_stmt)
1412 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1413 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1415 tree use_lhs = gimple_assign_lhs (use_stmt);
1416 tree use_type = TREE_TYPE (use_lhs);
1418 if (INTEGRAL_TYPE_P (use_type)
1419 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1421 last_stmt = use_stmt;
1422 type = use_type;
1426 /* Check if this a widening operation. */
1427 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1428 &oprnd0, stmts,
1429 type, &half_type0, def_stmt0))
1430 return NULL;
1432 /* Pattern detected. */
1433 if (vect_print_dump_info (REPORT_DETAILS))
1434 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1436 /* Check target support. */
1437 vectype = get_vectype_for_scalar_type (half_type0);
1438 vectype_out = get_vectype_for_scalar_type (type);
1440 if (!vectype
1441 || !vectype_out
1442 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1443 vectype_out, vectype,
1444 &dummy, &dummy, &dummy_code,
1445 &dummy_code, &dummy_int,
1446 &dummy_vec))
1447 return NULL;
1449 *type_in = vectype;
1450 *type_out = vectype_out;
1452 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1453 var = vect_recog_temp_ssa_var (type, NULL);
1454 pattern_stmt =
1455 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1457 if (vect_print_dump_info (REPORT_DETAILS))
1458 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1460 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1461 return pattern_stmt;
1464 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1465 vectorized:
1467 type a_t;
1468 TYPE b_T, res_T;
1470 S1 a_t = ;
1471 S2 b_T = ;
1472 S3 res_T = b_T op a_t;
1474 where type 'TYPE' is a type with different size than 'type',
1475 and op is <<, >> or rotate.
1477 Also detect cases:
1479 type a_t;
1480 TYPE b_T, c_T, res_T;
1482 S0 c_T = ;
1483 S1 a_t = (type) c_T;
1484 S2 b_T = ;
1485 S3 res_T = b_T op a_t;
1487 Input/Output:
1489 * STMTS: Contains a stmt from which the pattern search begins,
1490 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1491 with a shift/rotate which has same type on both operands, in the
1492 second case just b_T op c_T, in the first case with added cast
1493 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1495 Output:
1497 * TYPE_IN: The type of the input arguments to the pattern.
1499 * TYPE_OUT: The type of the output of this pattern.
1501 * Return value: A new stmt that will be used to replace the shift/rotate
1502 S3 stmt. */
1504 static gimple
1505 vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1506 tree *type_in, tree *type_out)
1508 gimple last_stmt = VEC_pop (gimple, *stmts);
1509 tree oprnd0, oprnd1, lhs, var;
1510 gimple pattern_stmt, def_stmt;
1511 enum tree_code rhs_code;
1512 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1513 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1514 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1515 enum vect_def_type dt;
1516 tree def;
1518 if (!is_gimple_assign (last_stmt))
1519 return NULL;
1521 rhs_code = gimple_assign_rhs_code (last_stmt);
1522 switch (rhs_code)
1524 case LSHIFT_EXPR:
1525 case RSHIFT_EXPR:
1526 case LROTATE_EXPR:
1527 case RROTATE_EXPR:
1528 break;
1529 default:
1530 return NULL;
1533 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1534 return NULL;
1536 lhs = gimple_assign_lhs (last_stmt);
1537 oprnd0 = gimple_assign_rhs1 (last_stmt);
1538 oprnd1 = gimple_assign_rhs2 (last_stmt);
1539 if (TREE_CODE (oprnd0) != SSA_NAME
1540 || TREE_CODE (oprnd1) != SSA_NAME
1541 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1542 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1543 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1544 || TYPE_PRECISION (TREE_TYPE (lhs))
1545 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1546 return NULL;
1548 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1549 &def, &dt))
1550 return NULL;
1552 if (dt != vect_internal_def)
1553 return NULL;
1555 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1556 *type_out = *type_in;
1557 if (*type_in == NULL_TREE)
1558 return NULL;
1560 def = NULL_TREE;
1561 if (gimple_assign_cast_p (def_stmt))
1563 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1564 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1565 && TYPE_PRECISION (TREE_TYPE (rhs1))
1566 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1567 def = rhs1;
1570 if (def == NULL_TREE)
1572 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1573 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1574 NULL_TREE);
1575 new_pattern_def_seq (stmt_vinfo, def_stmt);
1578 /* Pattern detected. */
1579 if (vect_print_dump_info (REPORT_DETAILS))
1580 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1582 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1583 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1584 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1586 if (vect_print_dump_info (REPORT_DETAILS))
1587 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1589 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1590 return pattern_stmt;
1593 /* Detect a signed division by a constant that wouldn't be
1594 otherwise vectorized:
1596 type a_t, b_t;
1598 S1 a_t = b_t / N;
1600 where type 'type' is an integral type and N is a constant.
1602 Similarly handle modulo by a constant:
1604 S4 a_t = b_t % N;
1606 Input/Output:
1608 * STMTS: Contains a stmt from which the pattern search begins,
1609 i.e. the division stmt. S1 is replaced by if N is a power
1610 of two constant and type is signed:
1611 S3 y_t = b_t < 0 ? N - 1 : 0;
1612 S2 x_t = b_t + y_t;
1613 S1' a_t = x_t >> log2 (N);
1615 S4 is replaced if N is a power of two constant and
1616 type is signed by (where *_T temporaries have unsigned type):
1617 S9 y_T = b_t < 0 ? -1U : 0U;
1618 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1619 S7 z_t = (type) z_T;
1620 S6 w_t = b_t + z_t;
1621 S5 x_t = w_t & (N - 1);
1622 S4' a_t = x_t - z_t;
1624 Output:
1626 * TYPE_IN: The type of the input arguments to the pattern.
1628 * TYPE_OUT: The type of the output of this pattern.
1630 * Return value: A new stmt that will be used to replace the division
1631 S1 or modulo S4 stmt. */
1633 static gimple
1634 vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
1635 tree *type_in, tree *type_out)
1637 gimple last_stmt = VEC_pop (gimple, *stmts);
1638 tree oprnd0, oprnd1, vectype, itype, cond;
1639 gimple pattern_stmt, def_stmt;
1640 enum tree_code rhs_code;
1641 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1642 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1643 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1644 optab optab;
1645 tree q;
1646 int dummy_int, prec;
1647 stmt_vec_info def_stmt_vinfo;
1649 if (!is_gimple_assign (last_stmt))
1650 return NULL;
1652 rhs_code = gimple_assign_rhs_code (last_stmt);
1653 switch (rhs_code)
1655 case TRUNC_DIV_EXPR:
1656 case TRUNC_MOD_EXPR:
1657 break;
1658 default:
1659 return NULL;
1662 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1663 return NULL;
1665 oprnd0 = gimple_assign_rhs1 (last_stmt);
1666 oprnd1 = gimple_assign_rhs2 (last_stmt);
1667 itype = TREE_TYPE (oprnd0);
1668 if (TREE_CODE (oprnd0) != SSA_NAME
1669 || TREE_CODE (oprnd1) != INTEGER_CST
1670 || TREE_CODE (itype) != INTEGER_TYPE
1671 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1672 return NULL;
1674 vectype = get_vectype_for_scalar_type (itype);
1675 if (vectype == NULL_TREE)
1676 return NULL;
1678 /* If the target can handle vectorized division or modulo natively,
1679 don't attempt to optimize this. */
1680 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1681 if (optab != NULL)
1683 enum machine_mode vec_mode = TYPE_MODE (vectype);
1684 int icode = (int) optab_handler (optab, vec_mode);
1685 if (icode != CODE_FOR_nothing
1686 || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1687 return NULL;
1690 prec = TYPE_PRECISION (itype);
1691 if (integer_pow2p (oprnd1))
1693 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1694 return NULL;
1696 /* Pattern detected. */
1697 if (vect_print_dump_info (REPORT_DETAILS))
1698 fprintf (vect_dump, "vect_recog_divmod_pattern: detected: ");
1700 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1701 build_int_cst (itype, 0));
1702 if (rhs_code == TRUNC_DIV_EXPR)
1704 tree var = vect_recog_temp_ssa_var (itype, NULL);
1705 tree shift;
1706 def_stmt
1707 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1708 fold_build2 (MINUS_EXPR, itype,
1709 oprnd1,
1710 build_int_cst (itype,
1711 1)),
1712 build_int_cst (itype, 0));
1713 new_pattern_def_seq (stmt_vinfo, def_stmt);
1714 var = vect_recog_temp_ssa_var (itype, NULL);
1715 def_stmt
1716 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1717 gimple_assign_lhs (def_stmt));
1718 append_pattern_def_seq (stmt_vinfo, def_stmt);
1720 shift = build_int_cst (itype, tree_log2 (oprnd1));
1721 pattern_stmt
1722 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1723 vect_recog_temp_ssa_var (itype,
1724 NULL),
1725 var, shift);
1727 else
1729 tree signmask;
1730 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1731 if (compare_tree_int (oprnd1, 2) == 0)
1733 signmask = vect_recog_temp_ssa_var (itype, NULL);
1734 def_stmt
1735 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1736 build_int_cst (itype, 1),
1737 build_int_cst (itype, 0));
1738 append_pattern_def_seq (stmt_vinfo, def_stmt);
1740 else
1742 tree utype
1743 = build_nonstandard_integer_type (prec, 1);
1744 tree vecutype = get_vectype_for_scalar_type (utype);
1745 tree shift
1746 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1747 - tree_log2 (oprnd1));
1748 tree var = vect_recog_temp_ssa_var (utype, NULL);
1750 def_stmt
1751 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1752 build_int_cst (utype, -1),
1753 build_int_cst (utype, 0));
1754 def_stmt_vinfo
1755 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1756 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1757 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1758 append_pattern_def_seq (stmt_vinfo, def_stmt);
1759 var = vect_recog_temp_ssa_var (utype, NULL);
1760 def_stmt
1761 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1762 gimple_assign_lhs (def_stmt),
1763 shift);
1764 def_stmt_vinfo
1765 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1766 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1767 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1768 append_pattern_def_seq (stmt_vinfo, def_stmt);
1769 signmask = vect_recog_temp_ssa_var (itype, NULL);
1770 def_stmt
1771 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1772 NULL_TREE);
1773 append_pattern_def_seq (stmt_vinfo, def_stmt);
1775 def_stmt
1776 = gimple_build_assign_with_ops (PLUS_EXPR,
1777 vect_recog_temp_ssa_var (itype,
1778 NULL),
1779 oprnd0, signmask);
1780 append_pattern_def_seq (stmt_vinfo, def_stmt);
1781 def_stmt
1782 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1783 vect_recog_temp_ssa_var (itype,
1784 NULL),
1785 gimple_assign_lhs (def_stmt),
1786 fold_build2 (MINUS_EXPR, itype,
1787 oprnd1,
1788 build_int_cst (itype,
1789 1)));
1790 append_pattern_def_seq (stmt_vinfo, def_stmt);
1792 pattern_stmt
1793 = gimple_build_assign_with_ops (MINUS_EXPR,
1794 vect_recog_temp_ssa_var (itype,
1795 NULL),
1796 gimple_assign_lhs (def_stmt),
1797 signmask);
1800 if (vect_print_dump_info (REPORT_DETAILS))
1801 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1803 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1805 *type_in = vectype;
1806 *type_out = vectype;
1807 return pattern_stmt;
1810 if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
1811 || integer_zerop (oprnd1)
1812 || prec > HOST_BITS_PER_WIDE_INT)
1813 return NULL;
1815 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
1816 return NULL;
1818 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1820 if (TYPE_UNSIGNED (itype))
1822 unsigned HOST_WIDE_INT mh, ml;
1823 int pre_shift, post_shift;
1824 unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
1825 & GET_MODE_MASK (TYPE_MODE (itype));
1826 tree t1, t2, t3, t4;
1828 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
1829 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
1830 return NULL;
1832 /* Find a suitable multiplier and right shift count
1833 instead of multiplying with D. */
1834 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
1836 /* If the suggested multiplier is more than SIZE bits, we can do better
1837 for even divisors, using an initial right shift. */
1838 if (mh != 0 && (d & 1) == 0)
1840 pre_shift = floor_log2 (d & -d);
1841 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
1842 &ml, &post_shift, &dummy_int);
1843 gcc_assert (!mh);
1845 else
1846 pre_shift = 0;
1848 if (mh != 0)
1850 if (post_shift - 1 >= prec)
1851 return NULL;
1853 /* t1 = oprnd0 h* ml;
1854 t2 = oprnd0 - t1;
1855 t3 = t2 >> 1;
1856 t4 = t1 + t3;
1857 q = t4 >> (post_shift - 1); */
1858 t1 = vect_recog_temp_ssa_var (itype, NULL);
1859 def_stmt
1860 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1861 build_int_cst (itype, ml));
1862 append_pattern_def_seq (stmt_vinfo, def_stmt);
1864 t2 = vect_recog_temp_ssa_var (itype, NULL);
1865 def_stmt
1866 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
1867 append_pattern_def_seq (stmt_vinfo, def_stmt);
1869 t3 = vect_recog_temp_ssa_var (itype, NULL);
1870 def_stmt
1871 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1872 integer_one_node);
1873 append_pattern_def_seq (stmt_vinfo, def_stmt);
1875 t4 = vect_recog_temp_ssa_var (itype, NULL);
1876 def_stmt
1877 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
1879 if (post_shift != 1)
1881 append_pattern_def_seq (stmt_vinfo, def_stmt);
1883 q = vect_recog_temp_ssa_var (itype, NULL);
1884 pattern_stmt
1885 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
1886 build_int_cst (itype,
1887 post_shift
1888 - 1));
1890 else
1892 q = t4;
1893 pattern_stmt = def_stmt;
1896 else
1898 if (pre_shift >= prec || post_shift >= prec)
1899 return NULL;
1901 /* t1 = oprnd0 >> pre_shift;
1902 t2 = t1 h* ml;
1903 q = t2 >> post_shift; */
1904 if (pre_shift)
1906 t1 = vect_recog_temp_ssa_var (itype, NULL);
1907 def_stmt
1908 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
1909 build_int_cst (NULL,
1910 pre_shift));
1911 append_pattern_def_seq (stmt_vinfo, def_stmt);
1913 else
1914 t1 = oprnd0;
1916 t2 = vect_recog_temp_ssa_var (itype, NULL);
1917 def_stmt
1918 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
1919 build_int_cst (itype, ml));
1921 if (post_shift)
1923 append_pattern_def_seq (stmt_vinfo, def_stmt);
1925 q = vect_recog_temp_ssa_var (itype, NULL);
1926 def_stmt
1927 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
1928 build_int_cst (itype,
1929 post_shift));
1931 else
1932 q = t2;
1934 pattern_stmt = def_stmt;
1937 else
1939 unsigned HOST_WIDE_INT ml;
1940 int post_shift;
1941 HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
1942 unsigned HOST_WIDE_INT abs_d;
1943 bool add = false;
1944 tree t1, t2, t3, t4;
1946 /* Give up for -1. */
1947 if (d == -1)
1948 return NULL;
1950 /* Since d might be INT_MIN, we have to cast to
1951 unsigned HOST_WIDE_INT before negating to avoid
1952 undefined signed overflow. */
1953 abs_d = (d >= 0
1954 ? (unsigned HOST_WIDE_INT) d
1955 : - (unsigned HOST_WIDE_INT) d);
1957 /* n rem d = n rem -d */
1958 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
1960 d = abs_d;
1961 oprnd1 = build_int_cst (itype, abs_d);
1963 else if (HOST_BITS_PER_WIDE_INT >= prec
1964 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1965 /* This case is not handled correctly below. */
1966 return NULL;
1968 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
1969 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1971 add = true;
1972 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
1974 if (post_shift >= prec)
1975 return NULL;
1977 /* t1 = oprnd1 h* ml; */
1978 t1 = vect_recog_temp_ssa_var (itype, NULL);
1979 def_stmt
1980 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1981 build_int_cst (itype, ml));
1982 append_pattern_def_seq (stmt_vinfo, def_stmt);
1984 if (add)
1986 /* t2 = t1 + oprnd0; */
1987 t2 = vect_recog_temp_ssa_var (itype, NULL);
1988 def_stmt
1989 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
1990 append_pattern_def_seq (stmt_vinfo, def_stmt);
1992 else
1993 t2 = t1;
1995 if (post_shift)
1997 /* t3 = t2 >> post_shift; */
1998 t3 = vect_recog_temp_ssa_var (itype, NULL);
1999 def_stmt
2000 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2001 build_int_cst (itype, post_shift));
2002 append_pattern_def_seq (stmt_vinfo, def_stmt);
2004 else
2005 t3 = t2;
2007 /* t4 = oprnd0 >> (prec - 1); */
2008 t4 = vect_recog_temp_ssa_var (itype, NULL);
2009 def_stmt
2010 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2011 build_int_cst (itype, prec - 1));
2012 append_pattern_def_seq (stmt_vinfo, def_stmt);
2014 /* q = t3 - t4; or q = t4 - t3; */
2015 q = vect_recog_temp_ssa_var (itype, NULL);
2016 pattern_stmt
2017 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2018 d < 0 ? t3 : t4);
2021 if (rhs_code == TRUNC_MOD_EXPR)
2023 tree r, t1;
2025 /* We divided. Now finish by:
2026 t1 = q * oprnd1;
2027 r = oprnd0 - t1; */
2028 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2030 t1 = vect_recog_temp_ssa_var (itype, NULL);
2031 def_stmt
2032 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2033 append_pattern_def_seq (stmt_vinfo, def_stmt);
2035 r = vect_recog_temp_ssa_var (itype, NULL);
2036 pattern_stmt
2037 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2040 /* Pattern detected. */
2041 if (vect_print_dump_info (REPORT_DETAILS))
2042 fprintf (vect_dump, "vect_recog_divmod_pattern: detected: ");
2044 if (vect_print_dump_info (REPORT_DETAILS))
2045 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2047 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2049 *type_in = vectype;
2050 *type_out = vectype;
2051 return pattern_stmt;
2054 /* Function vect_recog_mixed_size_cond_pattern
2056 Try to find the following pattern:
2058 type x_t, y_t;
2059 TYPE a_T, b_T, c_T;
2060 loop:
2061 S1 a_T = x_t CMP y_t ? b_T : c_T;
2063 where type 'TYPE' is an integral type which has different size
2064 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2065 than 'type', the constants need to fit into an integer type
2066 with the same width as 'type') or results of conversion from 'type'.
2068 Input:
2070 * LAST_STMT: A stmt from which the pattern search begins.
2072 Output:
2074 * TYPE_IN: The type of the input arguments to the pattern.
2076 * TYPE_OUT: The type of the output of this pattern.
2078 * Return value: A new stmt that will be used to replace the pattern.
2079 Additionally a def_stmt is added.
2081 a_it = x_t CMP y_t ? b_it : c_it;
2082 a_T = (TYPE) a_it; */
2084 static gimple
2085 vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2086 tree *type_out)
2088 gimple last_stmt = VEC_index (gimple, *stmts, 0);
2089 tree cond_expr, then_clause, else_clause;
2090 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2091 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2092 enum machine_mode cmpmode;
2093 gimple pattern_stmt, def_stmt;
2094 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2095 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2096 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2097 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2098 bool promotion;
2099 tree comp_scalar_type;
2101 if (!is_gimple_assign (last_stmt)
2102 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2103 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2104 return NULL;
2106 cond_expr = gimple_assign_rhs1 (last_stmt);
2107 then_clause = gimple_assign_rhs2 (last_stmt);
2108 else_clause = gimple_assign_rhs3 (last_stmt);
2110 if (!COMPARISON_CLASS_P (cond_expr))
2111 return NULL;
2113 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2114 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2115 if (comp_vectype == NULL_TREE)
2116 return NULL;
2118 type = gimple_expr_type (last_stmt);
2119 if (types_compatible_p (type, comp_scalar_type)
2120 || ((TREE_CODE (then_clause) != INTEGER_CST
2121 || TREE_CODE (else_clause) != INTEGER_CST)
2122 && !INTEGRAL_TYPE_P (comp_scalar_type))
2123 || !INTEGRAL_TYPE_P (type))
2124 return NULL;
2126 if ((TREE_CODE (then_clause) != INTEGER_CST
2127 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2128 &def_stmt0, &promotion))
2129 || (TREE_CODE (else_clause) != INTEGER_CST
2130 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2131 &def_stmt1, &promotion)))
2132 return NULL;
2134 if (orig_type0 && orig_type1
2135 && !types_compatible_p (orig_type0, orig_type1))
2136 return NULL;
2138 if (orig_type0)
2140 if (!types_compatible_p (orig_type0, comp_scalar_type))
2141 return NULL;
2142 then_clause = gimple_assign_rhs1 (def_stmt0);
2143 itype = orig_type0;
2146 if (orig_type1)
2148 if (!types_compatible_p (orig_type1, comp_scalar_type))
2149 return NULL;
2150 else_clause = gimple_assign_rhs1 (def_stmt1);
2151 itype = orig_type1;
2154 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2156 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2157 return NULL;
2159 vectype = get_vectype_for_scalar_type (type);
2160 if (vectype == NULL_TREE)
2161 return NULL;
2163 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2164 return NULL;
2166 if (itype == NULL_TREE)
2167 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2168 TYPE_UNSIGNED (type));
2170 if (itype == NULL_TREE
2171 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2172 return NULL;
2174 vecitype = get_vectype_for_scalar_type (itype);
2175 if (vecitype == NULL_TREE)
2176 return NULL;
2178 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2179 return NULL;
2181 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2183 if ((TREE_CODE (then_clause) == INTEGER_CST
2184 && !int_fits_type_p (then_clause, itype))
2185 || (TREE_CODE (else_clause) == INTEGER_CST
2186 && !int_fits_type_p (else_clause, itype)))
2187 return NULL;
2190 def_stmt
2191 = gimple_build_assign_with_ops3 (COND_EXPR,
2192 vect_recog_temp_ssa_var (itype, NULL),
2193 unshare_expr (cond_expr),
2194 fold_convert (itype, then_clause),
2195 fold_convert (itype, else_clause));
2196 pattern_stmt
2197 = gimple_build_assign_with_ops (NOP_EXPR,
2198 vect_recog_temp_ssa_var (type, NULL),
2199 gimple_assign_lhs (def_stmt), NULL_TREE);
2201 new_pattern_def_seq (stmt_vinfo, def_stmt);
2202 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2203 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2204 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2205 *type_in = vecitype;
2206 *type_out = vectype;
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: ");
2211 return pattern_stmt;
2215 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2216 true if bool VAR can be optimized that way. */
2218 static bool
2219 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2221 gimple def_stmt;
2222 enum vect_def_type dt;
2223 tree def, rhs1;
2224 enum tree_code rhs_code;
2226 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2227 &dt))
2228 return false;
2230 if (dt != vect_internal_def)
2231 return false;
2233 if (!is_gimple_assign (def_stmt))
2234 return false;
2236 if (!has_single_use (def))
2237 return false;
2239 rhs1 = gimple_assign_rhs1 (def_stmt);
2240 rhs_code = gimple_assign_rhs_code (def_stmt);
2241 switch (rhs_code)
2243 case SSA_NAME:
2244 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2246 CASE_CONVERT:
2247 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2248 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2249 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2250 return false;
2251 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2253 case BIT_NOT_EXPR:
2254 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2256 case BIT_AND_EXPR:
2257 case BIT_IOR_EXPR:
2258 case BIT_XOR_EXPR:
2259 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2260 return false;
2261 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2262 bb_vinfo);
2264 default:
2265 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2267 tree vecitype, comp_vectype;
2269 /* If the comparison can throw, then is_gimple_condexpr will be
2270 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2271 if (stmt_could_throw_p (def_stmt))
2272 return false;
2274 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2275 if (comp_vectype == NULL_TREE)
2276 return false;
2278 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2280 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2281 tree itype
2282 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2283 vecitype = get_vectype_for_scalar_type (itype);
2284 if (vecitype == NULL_TREE)
2285 return false;
2287 else
2288 vecitype = comp_vectype;
2289 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2291 return false;
2296 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2297 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2298 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2300 static tree
2301 adjust_bool_pattern_cast (tree type, tree var)
2303 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2304 gimple cast_stmt, pattern_stmt;
2306 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2307 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2308 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2309 cast_stmt
2310 = gimple_build_assign_with_ops (NOP_EXPR,
2311 vect_recog_temp_ssa_var (type, NULL),
2312 gimple_assign_lhs (pattern_stmt),
2313 NULL_TREE);
2314 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2315 return gimple_assign_lhs (cast_stmt);
2319 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2320 recursively. VAR is an SSA_NAME that should be transformed from bool
2321 to a wider integer type, OUT_TYPE is the desired final integer type of
2322 the whole pattern, TRUEVAL should be NULL unless optimizing
2323 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2324 in the then_clause, STMTS is where statements with added pattern stmts
2325 should be pushed to. */
2327 static tree
2328 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2329 VEC (gimple, heap) **stmts)
2331 gimple stmt = SSA_NAME_DEF_STMT (var);
2332 enum tree_code rhs_code, def_rhs_code;
2333 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2334 location_t loc;
2335 gimple pattern_stmt, def_stmt;
2337 rhs1 = gimple_assign_rhs1 (stmt);
2338 rhs2 = gimple_assign_rhs2 (stmt);
2339 rhs_code = gimple_assign_rhs_code (stmt);
2340 loc = gimple_location (stmt);
2341 switch (rhs_code)
2343 case SSA_NAME:
2344 CASE_CONVERT:
2345 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2346 itype = TREE_TYPE (irhs1);
2347 pattern_stmt
2348 = gimple_build_assign_with_ops (SSA_NAME,
2349 vect_recog_temp_ssa_var (itype, NULL),
2350 irhs1, NULL_TREE);
2351 break;
2353 case BIT_NOT_EXPR:
2354 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2355 itype = TREE_TYPE (irhs1);
2356 pattern_stmt
2357 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2358 vect_recog_temp_ssa_var (itype, NULL),
2359 irhs1, build_int_cst (itype, 1));
2360 break;
2362 case BIT_AND_EXPR:
2363 /* Try to optimize x = y & (a < b ? 1 : 0); into
2364 x = (a < b ? y : 0);
2366 E.g. for:
2367 bool a_b, b_b, c_b;
2368 TYPE d_T;
2370 S1 a_b = x1 CMP1 y1;
2371 S2 b_b = x2 CMP2 y2;
2372 S3 c_b = a_b & b_b;
2373 S4 d_T = (TYPE) c_b;
2375 we would normally emit:
2377 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2378 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2379 S3' c_T = a_T & b_T;
2380 S4' d_T = c_T;
2382 but we can save one stmt by using the
2383 result of one of the COND_EXPRs in the other COND_EXPR and leave
2384 BIT_AND_EXPR stmt out:
2386 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2387 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2388 S4' f_T = c_T;
2390 At least when VEC_COND_EXPR is implemented using masks
2391 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2392 computes the comparison masks and ands it, in one case with
2393 all ones vector, in the other case with a vector register.
2394 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2395 often more expensive. */
2396 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2397 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2398 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2400 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2401 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2402 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2403 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2405 gimple tstmt;
2406 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2407 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2408 tstmt = VEC_pop (gimple, *stmts);
2409 gcc_assert (tstmt == def_stmt);
2410 VEC_quick_push (gimple, *stmts, stmt);
2411 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2412 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2413 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2414 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2415 return irhs2;
2417 else
2418 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2419 goto and_ior_xor;
2421 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2422 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2423 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2425 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2426 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2427 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2428 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2430 gimple tstmt;
2431 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2432 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2433 tstmt = VEC_pop (gimple, *stmts);
2434 gcc_assert (tstmt == def_stmt);
2435 VEC_quick_push (gimple, *stmts, stmt);
2436 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2437 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2438 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2439 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2440 return irhs1;
2442 else
2443 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2444 goto and_ior_xor;
2446 /* FALLTHRU */
2447 case BIT_IOR_EXPR:
2448 case BIT_XOR_EXPR:
2449 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2450 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2451 and_ior_xor:
2452 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2453 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2455 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2456 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2457 int out_prec = TYPE_PRECISION (out_type);
2458 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2459 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2460 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2461 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2462 else
2464 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2465 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2468 itype = TREE_TYPE (irhs1);
2469 pattern_stmt
2470 = gimple_build_assign_with_ops (rhs_code,
2471 vect_recog_temp_ssa_var (itype, NULL),
2472 irhs1, irhs2);
2473 break;
2475 default:
2476 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2477 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2478 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2479 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2480 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2482 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2483 itype
2484 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2486 else
2487 itype = TREE_TYPE (rhs1);
2488 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2489 if (trueval == NULL_TREE)
2490 trueval = build_int_cst (itype, 1);
2491 else
2492 gcc_checking_assert (useless_type_conversion_p (itype,
2493 TREE_TYPE (trueval)));
2494 pattern_stmt
2495 = gimple_build_assign_with_ops3 (COND_EXPR,
2496 vect_recog_temp_ssa_var (itype, NULL),
2497 cond_expr, trueval,
2498 build_int_cst (itype, 0));
2499 break;
2502 VEC_safe_push (gimple, heap, *stmts, stmt);
2503 gimple_set_location (pattern_stmt, loc);
2504 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2505 return gimple_assign_lhs (pattern_stmt);
2509 /* Function vect_recog_bool_pattern
2511 Try to find pattern like following:
2513 bool a_b, b_b, c_b, d_b, e_b;
2514 TYPE f_T;
2515 loop:
2516 S1 a_b = x1 CMP1 y1;
2517 S2 b_b = x2 CMP2 y2;
2518 S3 c_b = a_b & b_b;
2519 S4 d_b = x3 CMP3 y3;
2520 S5 e_b = c_b | d_b;
2521 S6 f_T = (TYPE) e_b;
2523 where type 'TYPE' is an integral type.
2525 Input:
2527 * LAST_STMT: A stmt at the end from which the pattern
2528 search begins, i.e. cast of a bool to
2529 an integer type.
2531 Output:
2533 * TYPE_IN: The type of the input arguments to the pattern.
2535 * TYPE_OUT: The type of the output of this pattern.
2537 * Return value: A new stmt that will be used to replace the pattern.
2539 Assuming size of TYPE is the same as size of all comparisons
2540 (otherwise some casts would be added where needed), the above
2541 sequence we create related pattern stmts:
2542 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2543 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2544 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2545 S5' e_T = c_T | d_T;
2546 S6' f_T = e_T;
2548 Instead of the above S3' we could emit:
2549 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2550 S3' c_T = a_T | b_T;
2551 but the above is more efficient. */
2553 static gimple
2554 vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2555 tree *type_out)
2557 gimple last_stmt = VEC_pop (gimple, *stmts);
2558 enum tree_code rhs_code;
2559 tree var, lhs, rhs, vectype;
2560 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2561 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2562 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2563 gimple pattern_stmt;
2565 if (!is_gimple_assign (last_stmt))
2566 return NULL;
2568 var = gimple_assign_rhs1 (last_stmt);
2569 lhs = gimple_assign_lhs (last_stmt);
2571 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2572 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2573 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2574 return NULL;
2576 rhs_code = gimple_assign_rhs_code (last_stmt);
2577 if (CONVERT_EXPR_CODE_P (rhs_code))
2579 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2580 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2581 return NULL;
2582 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2583 if (vectype == NULL_TREE)
2584 return NULL;
2586 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2587 return NULL;
2589 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2590 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2591 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2592 pattern_stmt
2593 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2594 else
2595 pattern_stmt
2596 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2597 *type_out = vectype;
2598 *type_in = vectype;
2599 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2600 if (vect_print_dump_info (REPORT_DETAILS))
2601 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2603 return pattern_stmt;
2605 else if (rhs_code == SSA_NAME
2606 && STMT_VINFO_DATA_REF (stmt_vinfo))
2608 stmt_vec_info pattern_stmt_info;
2609 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2610 gcc_assert (vectype != NULL_TREE);
2611 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2612 return NULL;
2613 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2614 return NULL;
2616 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2617 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2618 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2620 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2621 gimple cast_stmt
2622 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2623 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2624 rhs = rhs2;
2626 pattern_stmt
2627 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2628 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2629 bb_vinfo);
2630 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2631 STMT_VINFO_DATA_REF (pattern_stmt_info)
2632 = STMT_VINFO_DATA_REF (stmt_vinfo);
2633 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2634 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2635 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2636 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2637 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2638 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2639 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2640 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2641 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2642 *type_out = vectype;
2643 *type_in = vectype;
2644 VEC_safe_push (gimple, heap, *stmts, last_stmt);
2645 if (vect_print_dump_info (REPORT_DETAILS))
2646 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2647 return pattern_stmt;
2649 else
2650 return NULL;
2654 /* Mark statements that are involved in a pattern. */
2656 static inline void
2657 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2658 tree pattern_vectype)
2660 stmt_vec_info pattern_stmt_info, def_stmt_info;
2661 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2662 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2663 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2664 gimple def_stmt;
2666 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2667 if (pattern_stmt_info == NULL)
2669 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2670 bb_vinfo);
2671 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2673 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2675 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2676 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2677 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2678 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2679 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2680 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2681 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2682 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2683 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2685 gimple_stmt_iterator si;
2686 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2687 !gsi_end_p (si); gsi_next (&si))
2689 def_stmt = gsi_stmt (si);
2690 def_stmt_info = vinfo_for_stmt (def_stmt);
2691 if (def_stmt_info == NULL)
2693 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2694 bb_vinfo);
2695 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2697 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2698 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2699 STMT_VINFO_DEF_TYPE (def_stmt_info)
2700 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2701 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2702 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2707 /* Function vect_pattern_recog_1
2709 Input:
2710 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2711 computation pattern.
2712 STMT: A stmt from which the pattern search should start.
2714 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2715 expression that computes the same functionality and can be used to
2716 replace the sequence of stmts that are involved in the pattern.
2718 Output:
2719 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2720 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2721 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2722 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2723 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2724 to the available target pattern.
2726 This function also does some bookkeeping, as explained in the documentation
2727 for vect_recog_pattern. */
2729 static void
2730 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2731 gimple_stmt_iterator si,
2732 VEC (gimple, heap) **stmts_to_replace)
2734 gimple stmt = gsi_stmt (si), pattern_stmt;
2735 stmt_vec_info stmt_info;
2736 loop_vec_info loop_vinfo;
2737 tree pattern_vectype;
2738 tree type_in, type_out;
2739 enum tree_code code;
2740 int i;
2741 gimple next;
2743 VEC_truncate (gimple, *stmts_to_replace, 0);
2744 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2745 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2746 if (!pattern_stmt)
2747 return;
2749 stmt = VEC_last (gimple, *stmts_to_replace);
2750 stmt_info = vinfo_for_stmt (stmt);
2751 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2753 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2755 /* No need to check target support (already checked by the pattern
2756 recognition function). */
2757 pattern_vectype = type_out ? type_out : type_in;
2759 else
2761 enum machine_mode vec_mode;
2762 enum insn_code icode;
2763 optab optab;
2765 /* Check target support */
2766 type_in = get_vectype_for_scalar_type (type_in);
2767 if (!type_in)
2768 return;
2769 if (type_out)
2770 type_out = get_vectype_for_scalar_type (type_out);
2771 else
2772 type_out = type_in;
2773 if (!type_out)
2774 return;
2775 pattern_vectype = type_out;
2777 if (is_gimple_assign (pattern_stmt))
2778 code = gimple_assign_rhs_code (pattern_stmt);
2779 else
2781 gcc_assert (is_gimple_call (pattern_stmt));
2782 code = CALL_EXPR;
2785 optab = optab_for_tree_code (code, type_in, optab_default);
2786 vec_mode = TYPE_MODE (type_in);
2787 if (!optab
2788 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2789 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2790 return;
2793 /* Found a vectorizable pattern. */
2794 if (vect_print_dump_info (REPORT_DETAILS))
2796 fprintf (vect_dump, "pattern recognized: ");
2797 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2800 /* Mark the stmts that are involved in the pattern. */
2801 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2803 /* Patterns cannot be vectorized using SLP, because they change the order of
2804 computation. */
2805 if (loop_vinfo)
2806 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2807 if (next == stmt)
2808 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
2810 /* It is possible that additional pattern stmts are created and inserted in
2811 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2812 relevant statements. */
2813 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2814 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
2815 i++)
2817 stmt_info = vinfo_for_stmt (stmt);
2818 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2819 if (vect_print_dump_info (REPORT_DETAILS))
2821 fprintf (vect_dump, "additional pattern stmt: ");
2822 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2825 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2830 /* Function vect_pattern_recog
2832 Input:
2833 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2834 computation idioms.
2836 Output - for each computation idiom that is detected we create a new stmt
2837 that provides the same functionality and that can be vectorized. We
2838 also record some information in the struct_stmt_info of the relevant
2839 stmts, as explained below:
2841 At the entry to this function we have the following stmts, with the
2842 following initial value in the STMT_VINFO fields:
2844 stmt in_pattern_p related_stmt vec_stmt
2845 S1: a_i = .... - - -
2846 S2: a_2 = ..use(a_i).. - - -
2847 S3: a_1 = ..use(a_2).. - - -
2848 S4: a_0 = ..use(a_1).. - - -
2849 S5: ... = ..use(a_0).. - - -
2851 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2852 represented by a single stmt. We then:
2853 - create a new stmt S6 equivalent to the pattern (the stmt is not
2854 inserted into the code)
2855 - fill in the STMT_VINFO fields as follows:
2857 in_pattern_p related_stmt vec_stmt
2858 S1: a_i = .... - - -
2859 S2: a_2 = ..use(a_i).. - - -
2860 S3: a_1 = ..use(a_2).. - - -
2861 S4: a_0 = ..use(a_1).. true S6 -
2862 '---> S6: a_new = .... - S4 -
2863 S5: ... = ..use(a_0).. - - -
2865 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2866 to each other through the RELATED_STMT field).
2868 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2869 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2870 remain irrelevant unless used by stmts other than S4.
2872 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2873 (because they are marked as irrelevant). It will vectorize S6, and record
2874 a pointer to the new vector stmt VS6 from S6 (as usual).
2875 S4 will be skipped, and S5 will be vectorized as usual:
2877 in_pattern_p related_stmt vec_stmt
2878 S1: a_i = .... - - -
2879 S2: a_2 = ..use(a_i).. - - -
2880 S3: a_1 = ..use(a_2).. - - -
2881 > VS6: va_new = .... - - -
2882 S4: a_0 = ..use(a_1).. true S6 VS6
2883 '---> S6: a_new = .... - S4 VS6
2884 > VS5: ... = ..vuse(va_new).. - - -
2885 S5: ... = ..use(a_0).. - - -
2887 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2888 elsewhere), and we'll end up with:
2890 VS6: va_new = ....
2891 VS5: ... = ..vuse(va_new)..
2893 In case of more than one pattern statements, e.g., widen-mult with
2894 intermediate type:
2896 S1 a_t = ;
2897 S2 a_T = (TYPE) a_t;
2898 '--> S3: a_it = (interm_type) a_t;
2899 S4 prod_T = a_T * CONST;
2900 '--> S5: prod_T' = a_it w* CONST;
2902 there may be other users of a_T outside the pattern. In that case S2 will
2903 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2904 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2905 be recorded in S3. */
2907 void
2908 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2910 struct loop *loop;
2911 basic_block *bbs;
2912 unsigned int nbbs;
2913 gimple_stmt_iterator si;
2914 unsigned int i, j;
2915 vect_recog_func_ptr vect_recog_func;
2916 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
2917 gimple stmt;
2919 if (vect_print_dump_info (REPORT_DETAILS))
2920 fprintf (vect_dump, "=== vect_pattern_recog ===");
2922 if (loop_vinfo)
2924 loop = LOOP_VINFO_LOOP (loop_vinfo);
2925 bbs = LOOP_VINFO_BBS (loop_vinfo);
2926 nbbs = loop->num_nodes;
2928 else
2930 bbs = &BB_VINFO_BB (bb_vinfo);
2931 nbbs = 1;
2934 /* Scan through the loop stmts, applying the pattern recognition
2935 functions starting at each stmt visited: */
2936 for (i = 0; i < nbbs; i++)
2938 basic_block bb = bbs[i];
2939 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2941 if (bb_vinfo && (stmt = gsi_stmt (si))
2942 && vinfo_for_stmt (stmt)
2943 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2944 continue;
2946 /* Scan over all generic vect_recog_xxx_pattern functions. */
2947 for (j = 0; j < NUM_PATTERNS; j++)
2949 vect_recog_func = vect_vect_recog_func_ptrs[j];
2950 vect_pattern_recog_1 (vect_recog_func, si,
2951 &stmts_to_replace);
2956 VEC_free (gimple, heap, stmts_to_replace);