2012-12-14 Steve Ellcey <sellcey@mips.com>
[official-gcc.git] / gcc / tree-vect-patterns.c
blob4b30ab2395ca38e452cc71701a268da8865f9c32
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "cfgloop.h"
33 #include "expr.h"
34 #include "optabs.h"
35 #include "params.h"
36 #include "tree-data-ref.h"
37 #include "tree-vectorizer.h"
38 #include "recog.h" /* FIXME: for insn_data */
39 #include "diagnostic-core.h"
40 #include "dumpfile.h"
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
51 tree *);
52 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
53 tree *, tree *);
54 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
55 tree *, tree *);
56 static gimple vect_recog_divmod_pattern (vec<gimple> *,
57 tree *, tree *);
58 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
59 tree *, tree *);
60 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
64 vect_recog_dot_prod_pattern,
65 vect_recog_pow_pattern,
66 vect_recog_widen_shift_pattern,
67 vect_recog_over_widening_pattern,
68 vect_recog_vector_vector_shift_pattern,
69 vect_recog_divmod_pattern,
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
73 static inline void
74 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 stmt);
80 static inline void
81 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
87 /* Check whether STMT2 is in the same loop or basic block as STMT1.
88 Which of the two applies depends on whether we're currently doing
89 loop-based or basic-block-based vectorization, as determined by
90 the vinfo_for_stmt for STMT1 (which must be defined).
92 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
93 to be defined as well. */
95 static bool
96 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
98 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
99 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
100 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
102 if (!gimple_bb (stmt2))
103 return false;
105 if (loop_vinfo)
107 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
108 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
109 return false;
111 else
113 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
114 || gimple_code (stmt2) == GIMPLE_PHI)
115 return false;
118 gcc_assert (vinfo_for_stmt (stmt2));
119 return true;
122 /* If the LHS of DEF_STMT has a single use, and that statement is
123 in the same loop or basic block, return it. */
125 static gimple
126 vect_single_imm_use (gimple def_stmt)
128 tree lhs = gimple_assign_lhs (def_stmt);
129 use_operand_p use_p;
130 gimple use_stmt;
132 if (!single_imm_use (lhs, &use_p, &use_stmt))
133 return NULL;
135 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
136 return NULL;
138 return use_stmt;
141 /* Check whether NAME, an ssa-name used in USE_STMT,
142 is a result of a type promotion or demotion, such that:
143 DEF_STMT: NAME = NOP (name0)
144 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
145 If CHECK_SIGN is TRUE, check that either both types are signed or both are
146 unsigned. */
148 static bool
149 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
150 tree *orig_type, gimple *def_stmt, bool *promotion)
152 tree dummy;
153 gimple dummy_gimple;
154 loop_vec_info loop_vinfo;
155 stmt_vec_info stmt_vinfo;
156 tree type = TREE_TYPE (name);
157 tree oprnd0;
158 enum vect_def_type dt;
159 tree def;
160 bb_vec_info bb_vinfo;
162 stmt_vinfo = vinfo_for_stmt (use_stmt);
163 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
164 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
165 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
166 &def, &dt))
167 return false;
169 if (dt != vect_internal_def
170 && dt != vect_external_def && dt != vect_constant_def)
171 return false;
173 if (!*def_stmt)
174 return false;
176 if (!is_gimple_assign (*def_stmt))
177 return false;
179 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
180 return false;
182 oprnd0 = gimple_assign_rhs1 (*def_stmt);
184 *orig_type = TREE_TYPE (oprnd0);
185 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
186 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
187 return false;
189 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
190 *promotion = true;
191 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
192 *promotion = false;
193 else
194 return false;
196 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
197 bb_vinfo, &dummy_gimple, &dummy, &dt))
198 return false;
200 return true;
203 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
204 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
206 static tree
207 vect_recog_temp_ssa_var (tree type, gimple stmt)
209 return make_temp_ssa_name (type, stmt, "patt");
212 /* Function vect_recog_dot_prod_pattern
214 Try to find the following pattern:
216 type x_t, y_t;
217 TYPE1 prod;
218 TYPE2 sum = init;
219 loop:
220 sum_0 = phi <init, sum_1>
221 S1 x_t = ...
222 S2 y_t = ...
223 S3 x_T = (TYPE1) x_t;
224 S4 y_T = (TYPE1) y_t;
225 S5 prod = x_T * y_T;
226 [S6 prod = (TYPE2) prod; #optional]
227 S7 sum_1 = prod + sum_0;
229 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
230 same size of 'TYPE1' or bigger. This is a special case of a reduction
231 computation.
233 Input:
235 * STMTS: Contains a stmt from which the pattern search begins. In the
236 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
237 will be detected.
239 Output:
241 * TYPE_IN: The type of the input arguments to the pattern.
243 * TYPE_OUT: The type of the output of this pattern.
245 * Return value: A new stmt that will be used to replace the sequence of
246 stmts that constitute the pattern. In this case it will be:
247 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
249 Note: The dot-prod idiom is a widening reduction pattern that is
250 vectorized without preserving all the intermediate results. It
251 produces only N/2 (widened) results (by summing up pairs of
252 intermediate results) rather than all N results. Therefore, we
253 cannot allow this pattern when we want to get all the results and in
254 the correct order (as is the case when this computation is in an
255 inner-loop nested in an outer-loop that us being vectorized). */
257 static gimple
258 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
259 tree *type_out)
261 gimple stmt, last_stmt = (*stmts)[0];
262 tree oprnd0, oprnd1;
263 tree oprnd00, oprnd01;
264 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
265 tree type, half_type;
266 gimple pattern_stmt;
267 tree prod_type;
268 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
269 struct loop *loop;
270 tree var;
271 bool promotion;
273 if (!loop_info)
274 return NULL;
276 loop = LOOP_VINFO_LOOP (loop_info);
278 if (!is_gimple_assign (last_stmt))
279 return NULL;
281 type = gimple_expr_type (last_stmt);
283 /* Look for the following pattern
284 DX = (TYPE1) X;
285 DY = (TYPE1) Y;
286 DPROD = DX * DY;
287 DDPROD = (TYPE2) DPROD;
288 sum_1 = DDPROD + sum_0;
289 In which
290 - DX is double the size of X
291 - DY is double the size of Y
292 - DX, DY, DPROD all have the same type
293 - sum is the same size of DPROD or bigger
294 - sum has been recognized as a reduction variable.
296 This is equivalent to:
297 DPROD = X w* Y; #widen mult
298 sum_1 = DPROD w+ sum_0; #widen summation
300 DPROD = X w* Y; #widen mult
301 sum_1 = DPROD + sum_0; #summation
304 /* Starting from LAST_STMT, follow the defs of its uses in search
305 of the above pattern. */
307 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
308 return NULL;
310 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
312 /* Has been detected as widening-summation? */
314 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
315 type = gimple_expr_type (stmt);
316 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
317 return NULL;
318 oprnd0 = gimple_assign_rhs1 (stmt);
319 oprnd1 = gimple_assign_rhs2 (stmt);
320 half_type = TREE_TYPE (oprnd0);
322 else
324 gimple def_stmt;
326 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
327 return NULL;
328 oprnd0 = gimple_assign_rhs1 (last_stmt);
329 oprnd1 = gimple_assign_rhs2 (last_stmt);
330 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
331 || !types_compatible_p (TREE_TYPE (oprnd1), type))
332 return NULL;
333 stmt = last_stmt;
335 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
336 &promotion)
337 && promotion)
339 stmt = def_stmt;
340 oprnd0 = gimple_assign_rhs1 (stmt);
342 else
343 half_type = type;
346 /* So far so good. Since last_stmt was detected as a (summation) reduction,
347 we know that oprnd1 is the reduction variable (defined by a loop-header
348 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
349 Left to check that oprnd0 is defined by a (widen_)mult_expr */
350 if (TREE_CODE (oprnd0) != SSA_NAME)
351 return NULL;
353 prod_type = half_type;
354 stmt = SSA_NAME_DEF_STMT (oprnd0);
356 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
357 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
358 return NULL;
360 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
361 inside the loop (in case we are analyzing an outer-loop). */
362 if (!is_gimple_assign (stmt))
363 return NULL;
364 stmt_vinfo = vinfo_for_stmt (stmt);
365 gcc_assert (stmt_vinfo);
366 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
367 return NULL;
368 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
369 return NULL;
370 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
372 /* Has been detected as a widening multiplication? */
374 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
375 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
376 return NULL;
377 stmt_vinfo = vinfo_for_stmt (stmt);
378 gcc_assert (stmt_vinfo);
379 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
380 oprnd00 = gimple_assign_rhs1 (stmt);
381 oprnd01 = gimple_assign_rhs2 (stmt);
383 else
385 tree half_type0, half_type1;
386 gimple def_stmt;
387 tree oprnd0, oprnd1;
389 oprnd0 = gimple_assign_rhs1 (stmt);
390 oprnd1 = gimple_assign_rhs2 (stmt);
391 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
392 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
393 return NULL;
394 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
395 &promotion)
396 || !promotion)
397 return NULL;
398 oprnd00 = gimple_assign_rhs1 (def_stmt);
399 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
400 &promotion)
401 || !promotion)
402 return NULL;
403 oprnd01 = gimple_assign_rhs1 (def_stmt);
404 if (!types_compatible_p (half_type0, half_type1))
405 return NULL;
406 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
407 return NULL;
410 half_type = TREE_TYPE (oprnd00);
411 *type_in = half_type;
412 *type_out = type;
414 /* Pattern detected. Create a stmt to be used to replace the pattern: */
415 var = vect_recog_temp_ssa_var (type, NULL);
416 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
417 oprnd00, oprnd01, oprnd1);
419 if (dump_enabled_p ())
421 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
422 "vect_recog_dot_prod_pattern: detected: ");
423 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
426 /* We don't allow changing the order of the computation in the inner-loop
427 when doing outer-loop vectorization. */
428 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
430 return pattern_stmt;
434 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
435 and LSHIFT_EXPR.
437 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
438 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
440 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
441 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
442 that satisfies the above restrictions, we can perform a widening opeartion
443 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
444 with a_it = (interm_type) a_t; */
446 static bool
447 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
448 tree const_oprnd, tree *oprnd,
449 vec<gimple> *stmts, tree type,
450 tree *half_type, gimple def_stmt)
452 tree new_type, new_oprnd;
453 gimple new_stmt;
455 if (code != MULT_EXPR && code != LSHIFT_EXPR)
456 return false;
458 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
459 || (code == LSHIFT_EXPR
460 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
461 != 1))
462 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
464 /* CONST_OPRND is a constant of HALF_TYPE. */
465 *oprnd = gimple_assign_rhs1 (def_stmt);
466 return true;
469 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
470 return false;
472 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
473 return false;
475 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
476 a type 2 times bigger than HALF_TYPE. */
477 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
478 TYPE_UNSIGNED (type));
479 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
480 || (code == LSHIFT_EXPR
481 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
482 return false;
484 /* Use NEW_TYPE for widening operation. */
485 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
487 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
488 /* Check if the already created pattern stmt is what we need. */
489 if (!is_gimple_assign (new_stmt)
490 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
491 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
492 return false;
494 stmts->safe_push (def_stmt);
495 *oprnd = gimple_assign_lhs (new_stmt);
497 else
499 /* Create a_T = (NEW_TYPE) a_t; */
500 *oprnd = gimple_assign_rhs1 (def_stmt);
501 new_oprnd = make_ssa_name (new_type, NULL);
502 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
503 NULL_TREE);
504 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
505 stmts->safe_push (def_stmt);
506 *oprnd = new_oprnd;
509 *half_type = new_type;
510 return true;
514 /* Function vect_recog_widen_mult_pattern
516 Try to find the following pattern:
518 type a_t, b_t;
519 TYPE a_T, b_T, prod_T;
521 S1 a_t = ;
522 S2 b_t = ;
523 S3 a_T = (TYPE) a_t;
524 S4 b_T = (TYPE) b_t;
525 S5 prod_T = a_T * b_T;
527 where type 'TYPE' is at least double the size of type 'type'.
529 Also detect unsigned cases:
531 unsigned type a_t, b_t;
532 unsigned TYPE u_prod_T;
533 TYPE a_T, b_T, prod_T;
535 S1 a_t = ;
536 S2 b_t = ;
537 S3 a_T = (TYPE) a_t;
538 S4 b_T = (TYPE) b_t;
539 S5 prod_T = a_T * b_T;
540 S6 u_prod_T = (unsigned TYPE) prod_T;
542 and multiplication by constants:
544 type a_t;
545 TYPE a_T, prod_T;
547 S1 a_t = ;
548 S3 a_T = (TYPE) a_t;
549 S5 prod_T = a_T * CONST;
551 A special case of multiplication by constants is when 'TYPE' is 4 times
552 bigger than 'type', but CONST fits an intermediate type 2 times smaller
553 than 'TYPE'. In that case we create an additional pattern stmt for S3
554 to create a variable of the intermediate type, and perform widen-mult
555 on the intermediate type as well:
557 type a_t;
558 interm_type a_it;
559 TYPE a_T, prod_T, prod_T';
561 S1 a_t = ;
562 S3 a_T = (TYPE) a_t;
563 '--> a_it = (interm_type) a_t;
564 S5 prod_T = a_T * CONST;
565 '--> prod_T' = a_it w* CONST;
567 Input/Output:
569 * STMTS: Contains a stmt from which the pattern search begins. In the
570 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
571 is detected. In case of unsigned widen-mult, the original stmt (S5) is
572 replaced with S6 in STMTS. In case of multiplication by a constant
573 of an intermediate type (the last case above), STMTS also contains S3
574 (inserted before S5).
576 Output:
578 * TYPE_IN: The type of the input arguments to the pattern.
580 * TYPE_OUT: The type of the output of this pattern.
582 * Return value: A new stmt that will be used to replace the sequence of
583 stmts that constitute the pattern. In this case it will be:
584 WIDEN_MULT <a_t, b_t>
587 static gimple
588 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
589 tree *type_in, tree *type_out)
591 gimple last_stmt = stmts->pop ();
592 gimple def_stmt0, def_stmt1;
593 tree oprnd0, oprnd1;
594 tree type, half_type0, half_type1;
595 gimple pattern_stmt;
596 tree vectype, vectype_out = NULL_TREE;
597 tree var;
598 enum tree_code dummy_code;
599 int dummy_int;
600 vec<tree> dummy_vec;
601 bool op1_ok;
602 bool promotion;
604 if (!is_gimple_assign (last_stmt))
605 return NULL;
607 type = gimple_expr_type (last_stmt);
609 /* Starting from LAST_STMT, follow the defs of its uses in search
610 of the above pattern. */
612 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
613 return NULL;
615 oprnd0 = gimple_assign_rhs1 (last_stmt);
616 oprnd1 = gimple_assign_rhs2 (last_stmt);
617 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
618 || !types_compatible_p (TREE_TYPE (oprnd1), type))
619 return NULL;
621 /* Check argument 0. */
622 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
623 &promotion)
624 || !promotion)
625 return NULL;
626 /* Check argument 1. */
627 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
628 &def_stmt1, &promotion);
630 if (op1_ok && promotion)
632 oprnd0 = gimple_assign_rhs1 (def_stmt0);
633 oprnd1 = gimple_assign_rhs1 (def_stmt1);
635 else
637 if (TREE_CODE (oprnd1) == INTEGER_CST
638 && TREE_CODE (half_type0) == INTEGER_TYPE
639 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
640 &oprnd0, stmts, type,
641 &half_type0, def_stmt0))
642 half_type1 = half_type0;
643 else
644 return NULL;
647 /* Handle unsigned case. Look for
648 S6 u_prod_T = (unsigned TYPE) prod_T;
649 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
650 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
652 gimple use_stmt;
653 tree use_lhs;
654 tree use_type;
656 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
657 return NULL;
659 use_stmt = vect_single_imm_use (last_stmt);
660 if (!use_stmt || !is_gimple_assign (use_stmt)
661 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
662 return NULL;
664 use_lhs = gimple_assign_lhs (use_stmt);
665 use_type = TREE_TYPE (use_lhs);
666 if (!INTEGRAL_TYPE_P (use_type)
667 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
668 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
669 return NULL;
671 type = use_type;
672 last_stmt = use_stmt;
675 if (!types_compatible_p (half_type0, half_type1))
676 return NULL;
678 /* Pattern detected. */
679 if (dump_enabled_p ())
680 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
681 "vect_recog_widen_mult_pattern: detected: ");
683 /* Check target support */
684 vectype = get_vectype_for_scalar_type (half_type0);
685 vectype_out = get_vectype_for_scalar_type (type);
686 if (!vectype
687 || !vectype_out
688 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
689 vectype_out, vectype,
690 &dummy_code, &dummy_code,
691 &dummy_int, &dummy_vec))
692 return NULL;
694 *type_in = vectype;
695 *type_out = vectype_out;
697 /* Pattern supported. Create a stmt to be used to replace the pattern: */
698 var = vect_recog_temp_ssa_var (type, NULL);
699 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
700 oprnd1);
702 if (dump_enabled_p ())
703 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
705 stmts->safe_push (last_stmt);
706 return pattern_stmt;
710 /* Function vect_recog_pow_pattern
712 Try to find the following pattern:
714 x = POW (y, N);
716 with POW being one of pow, powf, powi, powif and N being
717 either 2 or 0.5.
719 Input:
721 * LAST_STMT: A stmt from which the pattern search begins.
723 Output:
725 * TYPE_IN: The type of the input arguments to the pattern.
727 * TYPE_OUT: The type of the output of this pattern.
729 * Return value: A new stmt that will be used to replace the sequence of
730 stmts that constitute the pattern. In this case it will be:
731 x = x * x
733 x = sqrt (x)
736 static gimple
737 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
738 tree *type_out)
740 gimple last_stmt = (*stmts)[0];
741 tree fn, base, exp = NULL;
742 gimple stmt;
743 tree var;
745 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
746 return NULL;
748 fn = gimple_call_fndecl (last_stmt);
749 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
750 return NULL;
752 switch (DECL_FUNCTION_CODE (fn))
754 case BUILT_IN_POWIF:
755 case BUILT_IN_POWI:
756 case BUILT_IN_POWF:
757 case BUILT_IN_POW:
758 base = gimple_call_arg (last_stmt, 0);
759 exp = gimple_call_arg (last_stmt, 1);
760 if (TREE_CODE (exp) != REAL_CST
761 && TREE_CODE (exp) != INTEGER_CST)
762 return NULL;
763 break;
765 default:
766 return NULL;
769 /* We now have a pow or powi builtin function call with a constant
770 exponent. */
772 *type_out = NULL_TREE;
774 /* Catch squaring. */
775 if ((host_integerp (exp, 0)
776 && tree_low_cst (exp, 0) == 2)
777 || (TREE_CODE (exp) == REAL_CST
778 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
780 *type_in = TREE_TYPE (base);
782 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
783 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
784 return stmt;
787 /* Catch square root. */
788 if (TREE_CODE (exp) == REAL_CST
789 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
791 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
792 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
793 if (*type_in)
795 gimple stmt = gimple_build_call (newfn, 1, base);
796 if (vectorizable_function (stmt, *type_in, *type_in)
797 != NULL_TREE)
799 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
800 gimple_call_set_lhs (stmt, var);
801 return stmt;
806 return NULL;
810 /* Function vect_recog_widen_sum_pattern
812 Try to find the following pattern:
814 type x_t;
815 TYPE x_T, sum = init;
816 loop:
817 sum_0 = phi <init, sum_1>
818 S1 x_t = *p;
819 S2 x_T = (TYPE) x_t;
820 S3 sum_1 = x_T + sum_0;
822 where type 'TYPE' is at least double the size of type 'type', i.e - we're
823 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
824 a special case of a reduction computation.
826 Input:
828 * LAST_STMT: A stmt from which the pattern search begins. In the example,
829 when this function is called with S3, the pattern {S2,S3} will be detected.
831 Output:
833 * TYPE_IN: The type of the input arguments to the pattern.
835 * TYPE_OUT: The type of the output of this pattern.
837 * Return value: A new stmt that will be used to replace the sequence of
838 stmts that constitute the pattern. In this case it will be:
839 WIDEN_SUM <x_t, sum_0>
841 Note: The widening-sum idiom is a widening reduction pattern that is
842 vectorized without preserving all the intermediate results. It
843 produces only N/2 (widened) results (by summing up pairs of
844 intermediate results) rather than all N results. Therefore, we
845 cannot allow this pattern when we want to get all the results and in
846 the correct order (as is the case when this computation is in an
847 inner-loop nested in an outer-loop that us being vectorized). */
849 static gimple
850 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
851 tree *type_out)
853 gimple stmt, last_stmt = (*stmts)[0];
854 tree oprnd0, oprnd1;
855 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
856 tree type, half_type;
857 gimple pattern_stmt;
858 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
859 struct loop *loop;
860 tree var;
861 bool promotion;
863 if (!loop_info)
864 return NULL;
866 loop = LOOP_VINFO_LOOP (loop_info);
868 if (!is_gimple_assign (last_stmt))
869 return NULL;
871 type = gimple_expr_type (last_stmt);
873 /* Look for the following pattern
874 DX = (TYPE) X;
875 sum_1 = DX + sum_0;
876 In which DX is at least double the size of X, and sum_1 has been
877 recognized as a reduction variable.
880 /* Starting from LAST_STMT, follow the defs of its uses in search
881 of the above pattern. */
883 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
884 return NULL;
886 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
887 return NULL;
889 oprnd0 = gimple_assign_rhs1 (last_stmt);
890 oprnd1 = gimple_assign_rhs2 (last_stmt);
891 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
892 || !types_compatible_p (TREE_TYPE (oprnd1), type))
893 return NULL;
895 /* So far so good. Since last_stmt was detected as a (summation) reduction,
896 we know that oprnd1 is the reduction variable (defined by a loop-header
897 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
898 Left to check that oprnd0 is defined by a cast from type 'type' to type
899 'TYPE'. */
901 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
902 &promotion)
903 || !promotion)
904 return NULL;
906 oprnd0 = gimple_assign_rhs1 (stmt);
907 *type_in = half_type;
908 *type_out = type;
910 /* Pattern detected. Create a stmt to be used to replace the pattern: */
911 var = vect_recog_temp_ssa_var (type, NULL);
912 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
913 oprnd0, oprnd1);
915 if (dump_enabled_p ())
917 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
918 "vect_recog_widen_sum_pattern: detected: ");
919 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
922 /* We don't allow changing the order of the computation in the inner-loop
923 when doing outer-loop vectorization. */
924 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
926 return pattern_stmt;
930 /* Return TRUE if the operation in STMT can be performed on a smaller type.
932 Input:
933 STMT - a statement to check.
934 DEF - we support operations with two operands, one of which is constant.
935 The other operand can be defined by a demotion operation, or by a
936 previous statement in a sequence of over-promoted operations. In the
937 later case DEF is used to replace that operand. (It is defined by a
938 pattern statement we created for the previous statement in the
939 sequence).
941 Input/output:
942 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
943 NULL, it's the type of DEF.
944 STMTS - additional pattern statements. If a pattern statement (type
945 conversion) is created in this function, its original statement is
946 added to STMTS.
948 Output:
949 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
950 operands to use in the new pattern statement for STMT (will be created
951 in vect_recog_over_widening_pattern ()).
952 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
953 statements for STMT: the first one is a type promotion and the second
954 one is the operation itself. We return the type promotion statement
955 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
956 the second pattern statement. */
958 static bool
959 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
960 tree *op0, tree *op1, gimple *new_def_stmt,
961 vec<gimple> *stmts)
963 enum tree_code code;
964 tree const_oprnd, oprnd;
965 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
966 gimple def_stmt, new_stmt;
967 bool first = false;
968 bool promotion;
970 *op0 = NULL_TREE;
971 *op1 = NULL_TREE;
972 *new_def_stmt = NULL;
974 if (!is_gimple_assign (stmt))
975 return false;
977 code = gimple_assign_rhs_code (stmt);
978 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
979 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
980 return false;
982 oprnd = gimple_assign_rhs1 (stmt);
983 const_oprnd = gimple_assign_rhs2 (stmt);
984 type = gimple_expr_type (stmt);
986 if (TREE_CODE (oprnd) != SSA_NAME
987 || TREE_CODE (const_oprnd) != INTEGER_CST)
988 return false;
990 /* If oprnd has other uses besides that in stmt we cannot mark it
991 as being part of a pattern only. */
992 if (!has_single_use (oprnd))
993 return false;
995 /* If we are in the middle of a sequence, we use DEF from a previous
996 statement. Otherwise, OPRND has to be a result of type promotion. */
997 if (*new_type)
999 half_type = *new_type;
1000 oprnd = def;
1002 else
1004 first = true;
1005 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1006 &promotion)
1007 || !promotion
1008 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1009 return false;
1012 /* Can we perform the operation on a smaller type? */
1013 switch (code)
1015 case BIT_IOR_EXPR:
1016 case BIT_XOR_EXPR:
1017 case BIT_AND_EXPR:
1018 if (!int_fits_type_p (const_oprnd, half_type))
1020 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1021 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1022 return false;
1024 interm_type = build_nonstandard_integer_type (
1025 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1026 if (!int_fits_type_p (const_oprnd, interm_type))
1027 return false;
1030 break;
1032 case LSHIFT_EXPR:
1033 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1034 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1035 return false;
1037 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1038 (e.g., if the original value was char, the shift amount is at most 8
1039 if we want to use short). */
1040 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1041 return false;
1043 interm_type = build_nonstandard_integer_type (
1044 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1046 if (!vect_supportable_shift (code, interm_type))
1047 return false;
1049 break;
1051 case RSHIFT_EXPR:
1052 if (vect_supportable_shift (code, half_type))
1053 break;
1055 /* Try intermediate type - HALF_TYPE is not supported. */
1056 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1057 return false;
1059 interm_type = build_nonstandard_integer_type (
1060 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1062 if (!vect_supportable_shift (code, interm_type))
1063 return false;
1065 break;
1067 default:
1068 gcc_unreachable ();
1071 /* There are four possible cases:
1072 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1073 the first statement in the sequence)
1074 a. The original, HALF_TYPE, is not enough - we replace the promotion
1075 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1076 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1077 promotion.
1078 2. OPRND is defined by a pattern statement we created.
1079 a. Its type is not sufficient for the operation, we create a new stmt:
1080 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1081 this statement in NEW_DEF_STMT, and it is later put in
1082 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1083 b. OPRND is good to use in the new statement. */
1084 if (first)
1086 if (interm_type)
1088 /* Replace the original type conversion HALF_TYPE->TYPE with
1089 HALF_TYPE->INTERM_TYPE. */
1090 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1092 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1093 /* Check if the already created pattern stmt is what we need. */
1094 if (!is_gimple_assign (new_stmt)
1095 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1096 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1097 return false;
1099 stmts->safe_push (def_stmt);
1100 oprnd = gimple_assign_lhs (new_stmt);
1102 else
1104 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1105 oprnd = gimple_assign_rhs1 (def_stmt);
1106 new_oprnd = make_ssa_name (interm_type, NULL);
1107 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1108 oprnd, NULL_TREE);
1109 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1110 stmts->safe_push (def_stmt);
1111 oprnd = new_oprnd;
1114 else
1116 /* Retrieve the operand before the type promotion. */
1117 oprnd = gimple_assign_rhs1 (def_stmt);
1120 else
1122 if (interm_type)
1124 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1125 new_oprnd = make_ssa_name (interm_type, NULL);
1126 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1127 oprnd, NULL_TREE);
1128 oprnd = new_oprnd;
1129 *new_def_stmt = new_stmt;
1132 /* Otherwise, OPRND is already set. */
1135 if (interm_type)
1136 *new_type = interm_type;
1137 else
1138 *new_type = half_type;
1140 *op0 = oprnd;
1141 *op1 = fold_convert (*new_type, const_oprnd);
1143 return true;
1147 /* Try to find a statement or a sequence of statements that can be performed
1148 on a smaller type:
1150 type x_t;
1151 TYPE x_T, res0_T, res1_T;
1152 loop:
1153 S1 x_t = *p;
1154 S2 x_T = (TYPE) x_t;
1155 S3 res0_T = op (x_T, C0);
1156 S4 res1_T = op (res0_T, C1);
1157 S5 ... = () res1_T; - type demotion
1159 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1160 constants.
1161 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1162 be 'type' or some intermediate type. For now, we expect S5 to be a type
1163 demotion operation. We also check that S3 and S4 have only one use. */
1165 static gimple
1166 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1167 tree *type_in, tree *type_out)
1169 gimple stmt = stmts->pop ();
1170 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1171 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1172 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1173 bool first;
1174 tree type = NULL;
1176 first = true;
1177 while (1)
1179 if (!vinfo_for_stmt (stmt)
1180 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1181 return NULL;
1183 new_def_stmt = NULL;
1184 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1185 &op0, &op1, &new_def_stmt,
1186 stmts))
1188 if (first)
1189 return NULL;
1190 else
1191 break;
1194 /* STMT can be performed on a smaller type. Check its uses. */
1195 use_stmt = vect_single_imm_use (stmt);
1196 if (!use_stmt || !is_gimple_assign (use_stmt))
1197 return NULL;
1199 /* Create pattern statement for STMT. */
1200 vectype = get_vectype_for_scalar_type (new_type);
1201 if (!vectype)
1202 return NULL;
1204 /* We want to collect all the statements for which we create pattern
1205 statetments, except for the case when the last statement in the
1206 sequence doesn't have a corresponding pattern statement. In such
1207 case we associate the last pattern statement with the last statement
1208 in the sequence. Therefore, we only add the original statement to
1209 the list if we know that it is not the last. */
1210 if (prev_stmt)
1211 stmts->safe_push (prev_stmt);
1213 var = vect_recog_temp_ssa_var (new_type, NULL);
1214 pattern_stmt
1215 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1216 op0, op1);
1217 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1218 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1220 if (dump_enabled_p ())
1222 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1223 "created pattern stmt: ");
1224 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
1227 type = gimple_expr_type (stmt);
1228 prev_stmt = stmt;
1229 stmt = use_stmt;
1231 first = false;
1234 /* We got a sequence. We expect it to end with a type demotion operation.
1235 Otherwise, we quit (for now). There are three possible cases: the
1236 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1237 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1238 NEW_TYPE differs (we create a new conversion statement). */
1239 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1241 use_lhs = gimple_assign_lhs (use_stmt);
1242 use_type = TREE_TYPE (use_lhs);
1243 /* Support only type demotion or signedess change. */
1244 if (!INTEGRAL_TYPE_P (use_type)
1245 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1246 return NULL;
1248 /* Check that NEW_TYPE is not bigger than the conversion result. */
1249 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1250 return NULL;
1252 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1253 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1255 /* Create NEW_TYPE->USE_TYPE conversion. */
1256 new_oprnd = make_ssa_name (use_type, NULL);
1257 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1258 var, NULL_TREE);
1259 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1261 *type_in = get_vectype_for_scalar_type (new_type);
1262 *type_out = get_vectype_for_scalar_type (use_type);
1264 /* We created a pattern statement for the last statement in the
1265 sequence, so we don't need to associate it with the pattern
1266 statement created for PREV_STMT. Therefore, we add PREV_STMT
1267 to the list in order to mark it later in vect_pattern_recog_1. */
1268 if (prev_stmt)
1269 stmts->safe_push (prev_stmt);
1271 else
1273 if (prev_stmt)
1274 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1275 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1277 *type_in = vectype;
1278 *type_out = NULL_TREE;
1281 stmts->safe_push (use_stmt);
1283 else
1284 /* TODO: support general case, create a conversion to the correct type. */
1285 return NULL;
1287 /* Pattern detected. */
1288 if (dump_enabled_p ())
1290 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1291 "vect_recog_over_widening_pattern: detected: ");
1292 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
1295 return pattern_stmt;
1298 /* Detect widening shift pattern:
1300 type a_t;
1301 TYPE a_T, res_T;
1303 S1 a_t = ;
1304 S2 a_T = (TYPE) a_t;
1305 S3 res_T = a_T << CONST;
1307 where type 'TYPE' is at least double the size of type 'type'.
1309 Also detect cases where the shift result is immediately converted
1310 to another type 'result_type' that is no larger in size than 'TYPE'.
1311 In those cases we perform a widen-shift that directly results in
1312 'result_type', to avoid a possible over-widening situation:
1314 type a_t;
1315 TYPE a_T, res_T;
1316 result_type res_result;
1318 S1 a_t = ;
1319 S2 a_T = (TYPE) a_t;
1320 S3 res_T = a_T << CONST;
1321 S4 res_result = (result_type) res_T;
1322 '--> res_result' = a_t w<< CONST;
1324 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1325 create an additional pattern stmt for S2 to create a variable of an
1326 intermediate type, and perform widen-shift on the intermediate type:
1328 type a_t;
1329 interm_type a_it;
1330 TYPE a_T, res_T, res_T';
1332 S1 a_t = ;
1333 S2 a_T = (TYPE) a_t;
1334 '--> a_it = (interm_type) a_t;
1335 S3 res_T = a_T << CONST;
1336 '--> res_T' = a_it <<* CONST;
1338 Input/Output:
1340 * STMTS: Contains a stmt from which the pattern search begins.
1341 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1342 in STMTS. When an intermediate type is used and a pattern statement is
1343 created for S2, we also put S2 here (before S3).
1345 Output:
1347 * TYPE_IN: The type of the input arguments to the pattern.
1349 * TYPE_OUT: The type of the output of this pattern.
1351 * Return value: A new stmt that will be used to replace the sequence of
1352 stmts that constitute the pattern. In this case it will be:
1353 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1355 static gimple
1356 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1357 tree *type_in, tree *type_out)
1359 gimple last_stmt = stmts->pop ();
1360 gimple def_stmt0;
1361 tree oprnd0, oprnd1;
1362 tree type, half_type0;
1363 gimple pattern_stmt;
1364 tree vectype, vectype_out = NULL_TREE;
1365 tree var;
1366 enum tree_code dummy_code;
1367 int dummy_int;
1368 vec<tree> dummy_vec;
1369 gimple use_stmt;
1370 bool promotion;
1372 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1373 return NULL;
1375 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1376 return NULL;
1378 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1379 return NULL;
1381 oprnd0 = gimple_assign_rhs1 (last_stmt);
1382 oprnd1 = gimple_assign_rhs2 (last_stmt);
1383 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1384 return NULL;
1386 /* Check operand 0: it has to be defined by a type promotion. */
1387 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1388 &promotion)
1389 || !promotion)
1390 return NULL;
1392 /* Check operand 1: has to be positive. We check that it fits the type
1393 in vect_handle_widen_op_by_const (). */
1394 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1395 return NULL;
1397 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1398 type = gimple_expr_type (last_stmt);
1400 /* Check for subsequent conversion to another type. */
1401 use_stmt = vect_single_imm_use (last_stmt);
1402 if (use_stmt && is_gimple_assign (use_stmt)
1403 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1404 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1406 tree use_lhs = gimple_assign_lhs (use_stmt);
1407 tree use_type = TREE_TYPE (use_lhs);
1409 if (INTEGRAL_TYPE_P (use_type)
1410 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1412 last_stmt = use_stmt;
1413 type = use_type;
1417 /* Check if this a widening operation. */
1418 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1419 &oprnd0, stmts,
1420 type, &half_type0, def_stmt0))
1421 return NULL;
1423 /* Pattern detected. */
1424 if (dump_enabled_p ())
1425 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1426 "vect_recog_widen_shift_pattern: detected: ");
1428 /* Check target support. */
1429 vectype = get_vectype_for_scalar_type (half_type0);
1430 vectype_out = get_vectype_for_scalar_type (type);
1432 if (!vectype
1433 || !vectype_out
1434 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1435 vectype_out, vectype,
1436 &dummy_code, &dummy_code,
1437 &dummy_int, &dummy_vec))
1438 return NULL;
1440 *type_in = vectype;
1441 *type_out = vectype_out;
1443 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1444 var = vect_recog_temp_ssa_var (type, NULL);
1445 pattern_stmt =
1446 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1448 if (dump_enabled_p ())
1449 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1451 stmts->safe_push (last_stmt);
1452 return pattern_stmt;
1455 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1456 vectorized:
1458 type a_t;
1459 TYPE b_T, res_T;
1461 S1 a_t = ;
1462 S2 b_T = ;
1463 S3 res_T = b_T op a_t;
1465 where type 'TYPE' is a type with different size than 'type',
1466 and op is <<, >> or rotate.
1468 Also detect cases:
1470 type a_t;
1471 TYPE b_T, c_T, res_T;
1473 S0 c_T = ;
1474 S1 a_t = (type) c_T;
1475 S2 b_T = ;
1476 S3 res_T = b_T op a_t;
1478 Input/Output:
1480 * STMTS: Contains a stmt from which the pattern search begins,
1481 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1482 with a shift/rotate which has same type on both operands, in the
1483 second case just b_T op c_T, in the first case with added cast
1484 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1486 Output:
1488 * TYPE_IN: The type of the input arguments to the pattern.
1490 * TYPE_OUT: The type of the output of this pattern.
1492 * Return value: A new stmt that will be used to replace the shift/rotate
1493 S3 stmt. */
1495 static gimple
1496 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
1497 tree *type_in, tree *type_out)
1499 gimple last_stmt = stmts->pop ();
1500 tree oprnd0, oprnd1, lhs, var;
1501 gimple pattern_stmt, def_stmt;
1502 enum tree_code rhs_code;
1503 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1504 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1505 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1506 enum vect_def_type dt;
1507 tree def;
1509 if (!is_gimple_assign (last_stmt))
1510 return NULL;
1512 rhs_code = gimple_assign_rhs_code (last_stmt);
1513 switch (rhs_code)
1515 case LSHIFT_EXPR:
1516 case RSHIFT_EXPR:
1517 case LROTATE_EXPR:
1518 case RROTATE_EXPR:
1519 break;
1520 default:
1521 return NULL;
1524 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1525 return NULL;
1527 lhs = gimple_assign_lhs (last_stmt);
1528 oprnd0 = gimple_assign_rhs1 (last_stmt);
1529 oprnd1 = gimple_assign_rhs2 (last_stmt);
1530 if (TREE_CODE (oprnd0) != SSA_NAME
1531 || TREE_CODE (oprnd1) != SSA_NAME
1532 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1533 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1534 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1535 || TYPE_PRECISION (TREE_TYPE (lhs))
1536 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1537 return NULL;
1539 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1540 &def, &dt))
1541 return NULL;
1543 if (dt != vect_internal_def)
1544 return NULL;
1546 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1547 *type_out = *type_in;
1548 if (*type_in == NULL_TREE)
1549 return NULL;
1551 def = NULL_TREE;
1552 if (gimple_assign_cast_p (def_stmt))
1554 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1555 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1556 && TYPE_PRECISION (TREE_TYPE (rhs1))
1557 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1558 def = rhs1;
1561 if (def == NULL_TREE)
1563 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1564 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1565 NULL_TREE);
1566 new_pattern_def_seq (stmt_vinfo, def_stmt);
1569 /* Pattern detected. */
1570 if (dump_enabled_p ())
1571 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1572 "vect_recog_vector_vector_shift_pattern: detected: ");
1574 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1575 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1576 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1578 if (dump_enabled_p ())
1579 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1581 stmts->safe_push (last_stmt);
1582 return pattern_stmt;
1585 /* Detect a signed division by a constant that wouldn't be
1586 otherwise vectorized:
1588 type a_t, b_t;
1590 S1 a_t = b_t / N;
1592 where type 'type' is an integral type and N is a constant.
1594 Similarly handle modulo by a constant:
1596 S4 a_t = b_t % N;
1598 Input/Output:
1600 * STMTS: Contains a stmt from which the pattern search begins,
1601 i.e. the division stmt. S1 is replaced by if N is a power
1602 of two constant and type is signed:
1603 S3 y_t = b_t < 0 ? N - 1 : 0;
1604 S2 x_t = b_t + y_t;
1605 S1' a_t = x_t >> log2 (N);
1607 S4 is replaced if N is a power of two constant and
1608 type is signed by (where *_T temporaries have unsigned type):
1609 S9 y_T = b_t < 0 ? -1U : 0U;
1610 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1611 S7 z_t = (type) z_T;
1612 S6 w_t = b_t + z_t;
1613 S5 x_t = w_t & (N - 1);
1614 S4' a_t = x_t - z_t;
1616 Output:
1618 * TYPE_IN: The type of the input arguments to the pattern.
1620 * TYPE_OUT: The type of the output of this pattern.
1622 * Return value: A new stmt that will be used to replace the division
1623 S1 or modulo S4 stmt. */
1625 static gimple
1626 vect_recog_divmod_pattern (vec<gimple> *stmts,
1627 tree *type_in, tree *type_out)
1629 gimple last_stmt = stmts->pop ();
1630 tree oprnd0, oprnd1, vectype, itype, cond;
1631 gimple pattern_stmt, def_stmt;
1632 enum tree_code rhs_code;
1633 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1634 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1635 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1636 optab optab;
1637 tree q;
1638 int dummy_int, prec;
1639 stmt_vec_info def_stmt_vinfo;
1641 if (!is_gimple_assign (last_stmt))
1642 return NULL;
1644 rhs_code = gimple_assign_rhs_code (last_stmt);
1645 switch (rhs_code)
1647 case TRUNC_DIV_EXPR:
1648 case TRUNC_MOD_EXPR:
1649 break;
1650 default:
1651 return NULL;
1654 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1655 return NULL;
1657 oprnd0 = gimple_assign_rhs1 (last_stmt);
1658 oprnd1 = gimple_assign_rhs2 (last_stmt);
1659 itype = TREE_TYPE (oprnd0);
1660 if (TREE_CODE (oprnd0) != SSA_NAME
1661 || TREE_CODE (oprnd1) != INTEGER_CST
1662 || TREE_CODE (itype) != INTEGER_TYPE
1663 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
1664 return NULL;
1666 vectype = get_vectype_for_scalar_type (itype);
1667 if (vectype == NULL_TREE)
1668 return NULL;
1670 /* If the target can handle vectorized division or modulo natively,
1671 don't attempt to optimize this. */
1672 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1673 if (optab != unknown_optab)
1675 enum machine_mode vec_mode = TYPE_MODE (vectype);
1676 int icode = (int) optab_handler (optab, vec_mode);
1677 if (icode != CODE_FOR_nothing)
1678 return NULL;
1681 prec = TYPE_PRECISION (itype);
1682 if (integer_pow2p (oprnd1))
1684 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1685 return NULL;
1687 /* Pattern detected. */
1688 if (dump_enabled_p ())
1689 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1690 "vect_recog_divmod_pattern: detected: ");
1692 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1693 build_int_cst (itype, 0));
1694 if (rhs_code == TRUNC_DIV_EXPR)
1696 tree var = vect_recog_temp_ssa_var (itype, NULL);
1697 tree shift;
1698 def_stmt
1699 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1700 fold_build2 (MINUS_EXPR, itype,
1701 oprnd1,
1702 build_int_cst (itype,
1703 1)),
1704 build_int_cst (itype, 0));
1705 new_pattern_def_seq (stmt_vinfo, def_stmt);
1706 var = vect_recog_temp_ssa_var (itype, NULL);
1707 def_stmt
1708 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1709 gimple_assign_lhs (def_stmt));
1710 append_pattern_def_seq (stmt_vinfo, def_stmt);
1712 shift = build_int_cst (itype, tree_log2 (oprnd1));
1713 pattern_stmt
1714 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1715 vect_recog_temp_ssa_var (itype,
1716 NULL),
1717 var, shift);
1719 else
1721 tree signmask;
1722 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1723 if (compare_tree_int (oprnd1, 2) == 0)
1725 signmask = vect_recog_temp_ssa_var (itype, NULL);
1726 def_stmt
1727 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1728 build_int_cst (itype, 1),
1729 build_int_cst (itype, 0));
1730 append_pattern_def_seq (stmt_vinfo, def_stmt);
1732 else
1734 tree utype
1735 = build_nonstandard_integer_type (prec, 1);
1736 tree vecutype = get_vectype_for_scalar_type (utype);
1737 tree shift
1738 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1739 - tree_log2 (oprnd1));
1740 tree var = vect_recog_temp_ssa_var (utype, NULL);
1742 def_stmt
1743 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1744 build_int_cst (utype, -1),
1745 build_int_cst (utype, 0));
1746 def_stmt_vinfo
1747 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1748 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1749 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1750 append_pattern_def_seq (stmt_vinfo, def_stmt);
1751 var = vect_recog_temp_ssa_var (utype, NULL);
1752 def_stmt
1753 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1754 gimple_assign_lhs (def_stmt),
1755 shift);
1756 def_stmt_vinfo
1757 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1758 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1759 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1760 append_pattern_def_seq (stmt_vinfo, def_stmt);
1761 signmask = vect_recog_temp_ssa_var (itype, NULL);
1762 def_stmt
1763 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1764 NULL_TREE);
1765 append_pattern_def_seq (stmt_vinfo, def_stmt);
1767 def_stmt
1768 = gimple_build_assign_with_ops (PLUS_EXPR,
1769 vect_recog_temp_ssa_var (itype,
1770 NULL),
1771 oprnd0, signmask);
1772 append_pattern_def_seq (stmt_vinfo, def_stmt);
1773 def_stmt
1774 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1775 vect_recog_temp_ssa_var (itype,
1776 NULL),
1777 gimple_assign_lhs (def_stmt),
1778 fold_build2 (MINUS_EXPR, itype,
1779 oprnd1,
1780 build_int_cst (itype,
1781 1)));
1782 append_pattern_def_seq (stmt_vinfo, def_stmt);
1784 pattern_stmt
1785 = gimple_build_assign_with_ops (MINUS_EXPR,
1786 vect_recog_temp_ssa_var (itype,
1787 NULL),
1788 gimple_assign_lhs (def_stmt),
1789 signmask);
1792 if (dump_enabled_p ())
1793 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
1796 stmts->safe_push (last_stmt);
1798 *type_in = vectype;
1799 *type_out = vectype;
1800 return pattern_stmt;
1803 if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
1804 || integer_zerop (oprnd1)
1805 || prec > HOST_BITS_PER_WIDE_INT)
1806 return NULL;
1808 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
1809 return NULL;
1811 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1813 if (TYPE_UNSIGNED (itype))
1815 unsigned HOST_WIDE_INT mh, ml;
1816 int pre_shift, post_shift;
1817 unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
1818 & GET_MODE_MASK (TYPE_MODE (itype));
1819 tree t1, t2, t3, t4;
1821 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
1822 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
1823 return NULL;
1825 /* Find a suitable multiplier and right shift count
1826 instead of multiplying with D. */
1827 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
1829 /* If the suggested multiplier is more than SIZE bits, we can do better
1830 for even divisors, using an initial right shift. */
1831 if (mh != 0 && (d & 1) == 0)
1833 pre_shift = floor_log2 (d & -d);
1834 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
1835 &ml, &post_shift, &dummy_int);
1836 gcc_assert (!mh);
1838 else
1839 pre_shift = 0;
1841 if (mh != 0)
1843 if (post_shift - 1 >= prec)
1844 return NULL;
1846 /* t1 = oprnd0 h* ml;
1847 t2 = oprnd0 - t1;
1848 t3 = t2 >> 1;
1849 t4 = t1 + t3;
1850 q = t4 >> (post_shift - 1); */
1851 t1 = vect_recog_temp_ssa_var (itype, NULL);
1852 def_stmt
1853 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1854 build_int_cst (itype, ml));
1855 append_pattern_def_seq (stmt_vinfo, def_stmt);
1857 t2 = vect_recog_temp_ssa_var (itype, NULL);
1858 def_stmt
1859 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
1860 append_pattern_def_seq (stmt_vinfo, def_stmt);
1862 t3 = vect_recog_temp_ssa_var (itype, NULL);
1863 def_stmt
1864 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1865 integer_one_node);
1866 append_pattern_def_seq (stmt_vinfo, def_stmt);
1868 t4 = vect_recog_temp_ssa_var (itype, NULL);
1869 def_stmt
1870 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
1872 if (post_shift != 1)
1874 append_pattern_def_seq (stmt_vinfo, def_stmt);
1876 q = vect_recog_temp_ssa_var (itype, NULL);
1877 pattern_stmt
1878 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
1879 build_int_cst (itype,
1880 post_shift
1881 - 1));
1883 else
1885 q = t4;
1886 pattern_stmt = def_stmt;
1889 else
1891 if (pre_shift >= prec || post_shift >= prec)
1892 return NULL;
1894 /* t1 = oprnd0 >> pre_shift;
1895 t2 = t1 h* ml;
1896 q = t2 >> post_shift; */
1897 if (pre_shift)
1899 t1 = vect_recog_temp_ssa_var (itype, NULL);
1900 def_stmt
1901 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
1902 build_int_cst (NULL,
1903 pre_shift));
1904 append_pattern_def_seq (stmt_vinfo, def_stmt);
1906 else
1907 t1 = oprnd0;
1909 t2 = vect_recog_temp_ssa_var (itype, NULL);
1910 def_stmt
1911 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
1912 build_int_cst (itype, ml));
1914 if (post_shift)
1916 append_pattern_def_seq (stmt_vinfo, def_stmt);
1918 q = vect_recog_temp_ssa_var (itype, NULL);
1919 def_stmt
1920 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
1921 build_int_cst (itype,
1922 post_shift));
1924 else
1925 q = t2;
1927 pattern_stmt = def_stmt;
1930 else
1932 unsigned HOST_WIDE_INT ml;
1933 int post_shift;
1934 HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
1935 unsigned HOST_WIDE_INT abs_d;
1936 bool add = false;
1937 tree t1, t2, t3, t4;
1939 /* Give up for -1. */
1940 if (d == -1)
1941 return NULL;
1943 /* Since d might be INT_MIN, we have to cast to
1944 unsigned HOST_WIDE_INT before negating to avoid
1945 undefined signed overflow. */
1946 abs_d = (d >= 0
1947 ? (unsigned HOST_WIDE_INT) d
1948 : - (unsigned HOST_WIDE_INT) d);
1950 /* n rem d = n rem -d */
1951 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
1953 d = abs_d;
1954 oprnd1 = build_int_cst (itype, abs_d);
1956 else if (HOST_BITS_PER_WIDE_INT >= prec
1957 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1958 /* This case is not handled correctly below. */
1959 return NULL;
1961 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
1962 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1964 add = true;
1965 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
1967 if (post_shift >= prec)
1968 return NULL;
1970 /* t1 = oprnd1 h* ml; */
1971 t1 = vect_recog_temp_ssa_var (itype, NULL);
1972 def_stmt
1973 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
1974 build_int_cst (itype, ml));
1975 append_pattern_def_seq (stmt_vinfo, def_stmt);
1977 if (add)
1979 /* t2 = t1 + oprnd0; */
1980 t2 = vect_recog_temp_ssa_var (itype, NULL);
1981 def_stmt
1982 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
1983 append_pattern_def_seq (stmt_vinfo, def_stmt);
1985 else
1986 t2 = t1;
1988 if (post_shift)
1990 /* t3 = t2 >> post_shift; */
1991 t3 = vect_recog_temp_ssa_var (itype, NULL);
1992 def_stmt
1993 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
1994 build_int_cst (itype, post_shift));
1995 append_pattern_def_seq (stmt_vinfo, def_stmt);
1997 else
1998 t3 = t2;
2000 /* t4 = oprnd0 >> (prec - 1); */
2001 t4 = vect_recog_temp_ssa_var (itype, NULL);
2002 def_stmt
2003 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2004 build_int_cst (itype, prec - 1));
2005 append_pattern_def_seq (stmt_vinfo, def_stmt);
2007 /* q = t3 - t4; or q = t4 - t3; */
2008 q = vect_recog_temp_ssa_var (itype, NULL);
2009 pattern_stmt
2010 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2011 d < 0 ? t3 : t4);
2014 if (rhs_code == TRUNC_MOD_EXPR)
2016 tree r, t1;
2018 /* We divided. Now finish by:
2019 t1 = q * oprnd1;
2020 r = oprnd0 - t1; */
2021 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2023 t1 = vect_recog_temp_ssa_var (itype, NULL);
2024 def_stmt
2025 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2026 append_pattern_def_seq (stmt_vinfo, def_stmt);
2028 r = vect_recog_temp_ssa_var (itype, NULL);
2029 pattern_stmt
2030 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2033 /* Pattern detected. */
2034 if (dump_enabled_p ())
2036 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2037 "vect_recog_divmod_pattern: detected: ");
2038 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2041 stmts->safe_push (last_stmt);
2043 *type_in = vectype;
2044 *type_out = vectype;
2045 return pattern_stmt;
2048 /* Function vect_recog_mixed_size_cond_pattern
2050 Try to find the following pattern:
2052 type x_t, y_t;
2053 TYPE a_T, b_T, c_T;
2054 loop:
2055 S1 a_T = x_t CMP y_t ? b_T : c_T;
2057 where type 'TYPE' is an integral type which has different size
2058 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2059 than 'type', the constants need to fit into an integer type
2060 with the same width as 'type') or results of conversion from 'type'.
2062 Input:
2064 * LAST_STMT: A stmt from which the pattern search begins.
2066 Output:
2068 * TYPE_IN: The type of the input arguments to the pattern.
2070 * TYPE_OUT: The type of the output of this pattern.
2072 * Return value: A new stmt that will be used to replace the pattern.
2073 Additionally a def_stmt is added.
2075 a_it = x_t CMP y_t ? b_it : c_it;
2076 a_T = (TYPE) a_it; */
2078 static gimple
2079 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2080 tree *type_out)
2082 gimple last_stmt = (*stmts)[0];
2083 tree cond_expr, then_clause, else_clause;
2084 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2085 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2086 enum machine_mode cmpmode;
2087 gimple pattern_stmt, def_stmt;
2088 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2089 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2090 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2091 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2092 bool promotion;
2093 tree comp_scalar_type;
2095 if (!is_gimple_assign (last_stmt)
2096 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2097 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2098 return NULL;
2100 cond_expr = gimple_assign_rhs1 (last_stmt);
2101 then_clause = gimple_assign_rhs2 (last_stmt);
2102 else_clause = gimple_assign_rhs3 (last_stmt);
2104 if (!COMPARISON_CLASS_P (cond_expr))
2105 return NULL;
2107 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2108 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2109 if (comp_vectype == NULL_TREE)
2110 return NULL;
2112 type = gimple_expr_type (last_stmt);
2113 if (types_compatible_p (type, comp_scalar_type)
2114 || ((TREE_CODE (then_clause) != INTEGER_CST
2115 || TREE_CODE (else_clause) != INTEGER_CST)
2116 && !INTEGRAL_TYPE_P (comp_scalar_type))
2117 || !INTEGRAL_TYPE_P (type))
2118 return NULL;
2120 if ((TREE_CODE (then_clause) != INTEGER_CST
2121 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2122 &def_stmt0, &promotion))
2123 || (TREE_CODE (else_clause) != INTEGER_CST
2124 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2125 &def_stmt1, &promotion)))
2126 return NULL;
2128 if (orig_type0 && orig_type1
2129 && !types_compatible_p (orig_type0, orig_type1))
2130 return NULL;
2132 if (orig_type0)
2134 if (!types_compatible_p (orig_type0, comp_scalar_type))
2135 return NULL;
2136 then_clause = gimple_assign_rhs1 (def_stmt0);
2137 itype = orig_type0;
2140 if (orig_type1)
2142 if (!types_compatible_p (orig_type1, comp_scalar_type))
2143 return NULL;
2144 else_clause = gimple_assign_rhs1 (def_stmt1);
2145 itype = orig_type1;
2148 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2150 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2151 return NULL;
2153 vectype = get_vectype_for_scalar_type (type);
2154 if (vectype == NULL_TREE)
2155 return NULL;
2157 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2158 return NULL;
2160 if (itype == NULL_TREE)
2161 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2162 TYPE_UNSIGNED (type));
2164 if (itype == NULL_TREE
2165 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2166 return NULL;
2168 vecitype = get_vectype_for_scalar_type (itype);
2169 if (vecitype == NULL_TREE)
2170 return NULL;
2172 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2173 return NULL;
2175 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2177 if ((TREE_CODE (then_clause) == INTEGER_CST
2178 && !int_fits_type_p (then_clause, itype))
2179 || (TREE_CODE (else_clause) == INTEGER_CST
2180 && !int_fits_type_p (else_clause, itype)))
2181 return NULL;
2184 def_stmt
2185 = gimple_build_assign_with_ops (COND_EXPR,
2186 vect_recog_temp_ssa_var (itype, NULL),
2187 unshare_expr (cond_expr),
2188 fold_convert (itype, then_clause),
2189 fold_convert (itype, else_clause));
2190 pattern_stmt
2191 = gimple_build_assign_with_ops (NOP_EXPR,
2192 vect_recog_temp_ssa_var (type, NULL),
2193 gimple_assign_lhs (def_stmt), NULL_TREE);
2195 new_pattern_def_seq (stmt_vinfo, def_stmt);
2196 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2197 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2198 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2199 *type_in = vecitype;
2200 *type_out = vectype;
2202 if (dump_enabled_p ())
2203 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2204 "vect_recog_mixed_size_cond_pattern: detected: ");
2206 return pattern_stmt;
2210 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2211 true if bool VAR can be optimized that way. */
2213 static bool
2214 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2216 gimple def_stmt;
2217 enum vect_def_type dt;
2218 tree def, rhs1;
2219 enum tree_code rhs_code;
2221 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2222 &dt))
2223 return false;
2225 if (dt != vect_internal_def)
2226 return false;
2228 if (!is_gimple_assign (def_stmt))
2229 return false;
2231 if (!has_single_use (def))
2232 return false;
2234 rhs1 = gimple_assign_rhs1 (def_stmt);
2235 rhs_code = gimple_assign_rhs_code (def_stmt);
2236 switch (rhs_code)
2238 case SSA_NAME:
2239 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2241 CASE_CONVERT:
2242 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2243 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2244 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2245 return false;
2246 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2248 case BIT_NOT_EXPR:
2249 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2251 case BIT_AND_EXPR:
2252 case BIT_IOR_EXPR:
2253 case BIT_XOR_EXPR:
2254 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2255 return false;
2256 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2257 bb_vinfo);
2259 default:
2260 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2262 tree vecitype, comp_vectype;
2264 /* If the comparison can throw, then is_gimple_condexpr will be
2265 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2266 if (stmt_could_throw_p (def_stmt))
2267 return false;
2269 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2270 if (comp_vectype == NULL_TREE)
2271 return false;
2273 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2275 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2276 tree itype
2277 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2278 vecitype = get_vectype_for_scalar_type (itype);
2279 if (vecitype == NULL_TREE)
2280 return false;
2282 else
2283 vecitype = comp_vectype;
2284 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2286 return false;
2291 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2292 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2293 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2295 static tree
2296 adjust_bool_pattern_cast (tree type, tree var)
2298 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2299 gimple cast_stmt, pattern_stmt;
2301 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2302 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2303 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2304 cast_stmt
2305 = gimple_build_assign_with_ops (NOP_EXPR,
2306 vect_recog_temp_ssa_var (type, NULL),
2307 gimple_assign_lhs (pattern_stmt),
2308 NULL_TREE);
2309 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2310 return gimple_assign_lhs (cast_stmt);
2314 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2315 recursively. VAR is an SSA_NAME that should be transformed from bool
2316 to a wider integer type, OUT_TYPE is the desired final integer type of
2317 the whole pattern, TRUEVAL should be NULL unless optimizing
2318 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2319 in the then_clause, STMTS is where statements with added pattern stmts
2320 should be pushed to. */
2322 static tree
2323 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2324 vec<gimple> *stmts)
2326 gimple stmt = SSA_NAME_DEF_STMT (var);
2327 enum tree_code rhs_code, def_rhs_code;
2328 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2329 location_t loc;
2330 gimple pattern_stmt, def_stmt;
2332 rhs1 = gimple_assign_rhs1 (stmt);
2333 rhs2 = gimple_assign_rhs2 (stmt);
2334 rhs_code = gimple_assign_rhs_code (stmt);
2335 loc = gimple_location (stmt);
2336 switch (rhs_code)
2338 case SSA_NAME:
2339 CASE_CONVERT:
2340 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2341 itype = TREE_TYPE (irhs1);
2342 pattern_stmt
2343 = gimple_build_assign_with_ops (SSA_NAME,
2344 vect_recog_temp_ssa_var (itype, NULL),
2345 irhs1, NULL_TREE);
2346 break;
2348 case BIT_NOT_EXPR:
2349 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2350 itype = TREE_TYPE (irhs1);
2351 pattern_stmt
2352 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2353 vect_recog_temp_ssa_var (itype, NULL),
2354 irhs1, build_int_cst (itype, 1));
2355 break;
2357 case BIT_AND_EXPR:
2358 /* Try to optimize x = y & (a < b ? 1 : 0); into
2359 x = (a < b ? y : 0);
2361 E.g. for:
2362 bool a_b, b_b, c_b;
2363 TYPE d_T;
2365 S1 a_b = x1 CMP1 y1;
2366 S2 b_b = x2 CMP2 y2;
2367 S3 c_b = a_b & b_b;
2368 S4 d_T = (TYPE) c_b;
2370 we would normally emit:
2372 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2373 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2374 S3' c_T = a_T & b_T;
2375 S4' d_T = c_T;
2377 but we can save one stmt by using the
2378 result of one of the COND_EXPRs in the other COND_EXPR and leave
2379 BIT_AND_EXPR stmt out:
2381 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2382 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2383 S4' f_T = c_T;
2385 At least when VEC_COND_EXPR is implemented using masks
2386 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2387 computes the comparison masks and ands it, in one case with
2388 all ones vector, in the other case with a vector register.
2389 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2390 often more expensive. */
2391 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2392 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2393 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2395 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2396 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2397 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2398 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2400 gimple tstmt;
2401 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2402 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2403 tstmt = stmts->pop ();
2404 gcc_assert (tstmt == def_stmt);
2405 stmts->quick_push (stmt);
2406 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2407 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2408 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2409 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2410 return irhs2;
2412 else
2413 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2414 goto and_ior_xor;
2416 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2417 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2418 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2420 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2421 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2422 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2423 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2425 gimple tstmt;
2426 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2427 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2428 tstmt = stmts->pop ();
2429 gcc_assert (tstmt == def_stmt);
2430 stmts->quick_push (stmt);
2431 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2432 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2433 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2434 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2435 return irhs1;
2437 else
2438 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2439 goto and_ior_xor;
2441 /* FALLTHRU */
2442 case BIT_IOR_EXPR:
2443 case BIT_XOR_EXPR:
2444 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2445 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2446 and_ior_xor:
2447 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2448 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2450 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2451 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2452 int out_prec = TYPE_PRECISION (out_type);
2453 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2454 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2455 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2456 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2457 else
2459 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2460 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2463 itype = TREE_TYPE (irhs1);
2464 pattern_stmt
2465 = gimple_build_assign_with_ops (rhs_code,
2466 vect_recog_temp_ssa_var (itype, NULL),
2467 irhs1, irhs2);
2468 break;
2470 default:
2471 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2472 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
2473 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2474 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2475 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
2477 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2478 itype
2479 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2481 else
2482 itype = TREE_TYPE (rhs1);
2483 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2484 if (trueval == NULL_TREE)
2485 trueval = build_int_cst (itype, 1);
2486 else
2487 gcc_checking_assert (useless_type_conversion_p (itype,
2488 TREE_TYPE (trueval)));
2489 pattern_stmt
2490 = gimple_build_assign_with_ops (COND_EXPR,
2491 vect_recog_temp_ssa_var (itype, NULL),
2492 cond_expr, trueval,
2493 build_int_cst (itype, 0));
2494 break;
2497 stmts->safe_push (stmt);
2498 gimple_set_location (pattern_stmt, loc);
2499 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2500 return gimple_assign_lhs (pattern_stmt);
2504 /* Function vect_recog_bool_pattern
2506 Try to find pattern like following:
2508 bool a_b, b_b, c_b, d_b, e_b;
2509 TYPE f_T;
2510 loop:
2511 S1 a_b = x1 CMP1 y1;
2512 S2 b_b = x2 CMP2 y2;
2513 S3 c_b = a_b & b_b;
2514 S4 d_b = x3 CMP3 y3;
2515 S5 e_b = c_b | d_b;
2516 S6 f_T = (TYPE) e_b;
2518 where type 'TYPE' is an integral type.
2520 Input:
2522 * LAST_STMT: A stmt at the end from which the pattern
2523 search begins, i.e. cast of a bool to
2524 an integer type.
2526 Output:
2528 * TYPE_IN: The type of the input arguments to the pattern.
2530 * TYPE_OUT: The type of the output of this pattern.
2532 * Return value: A new stmt that will be used to replace the pattern.
2534 Assuming size of TYPE is the same as size of all comparisons
2535 (otherwise some casts would be added where needed), the above
2536 sequence we create related pattern stmts:
2537 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2538 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2539 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2540 S5' e_T = c_T | d_T;
2541 S6' f_T = e_T;
2543 Instead of the above S3' we could emit:
2544 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2545 S3' c_T = a_T | b_T;
2546 but the above is more efficient. */
2548 static gimple
2549 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
2550 tree *type_out)
2552 gimple last_stmt = stmts->pop ();
2553 enum tree_code rhs_code;
2554 tree var, lhs, rhs, vectype;
2555 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2556 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2557 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2558 gimple pattern_stmt;
2560 if (!is_gimple_assign (last_stmt))
2561 return NULL;
2563 var = gimple_assign_rhs1 (last_stmt);
2564 lhs = gimple_assign_lhs (last_stmt);
2566 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2567 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2568 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2569 return NULL;
2571 rhs_code = gimple_assign_rhs_code (last_stmt);
2572 if (CONVERT_EXPR_CODE_P (rhs_code))
2574 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2575 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
2576 return NULL;
2577 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2578 if (vectype == NULL_TREE)
2579 return NULL;
2581 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2582 return NULL;
2584 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2585 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2586 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2587 pattern_stmt
2588 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2589 else
2590 pattern_stmt
2591 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2592 *type_out = vectype;
2593 *type_in = vectype;
2594 stmts->safe_push (last_stmt);
2595 if (dump_enabled_p ())
2596 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2597 "vect_recog_bool_pattern: detected: ");
2599 return pattern_stmt;
2601 else if (rhs_code == SSA_NAME
2602 && STMT_VINFO_DATA_REF (stmt_vinfo))
2604 stmt_vec_info pattern_stmt_info;
2605 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2606 gcc_assert (vectype != NULL_TREE);
2607 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2608 return NULL;
2609 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
2610 return NULL;
2612 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2613 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2614 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2616 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2617 gimple cast_stmt
2618 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
2619 new_pattern_def_seq (stmt_vinfo, cast_stmt);
2620 rhs = rhs2;
2622 pattern_stmt
2623 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2624 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2625 bb_vinfo);
2626 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2627 STMT_VINFO_DATA_REF (pattern_stmt_info)
2628 = STMT_VINFO_DATA_REF (stmt_vinfo);
2629 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2630 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2631 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2632 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2633 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2634 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2635 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2636 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
2637 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
2638 *type_out = vectype;
2639 *type_in = vectype;
2640 stmts->safe_push (last_stmt);
2641 if (dump_enabled_p ())
2642 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2643 "vect_recog_bool_pattern: detected: ");
2644 return pattern_stmt;
2646 else
2647 return NULL;
2651 /* Mark statements that are involved in a pattern. */
2653 static inline void
2654 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2655 tree pattern_vectype)
2657 stmt_vec_info pattern_stmt_info, def_stmt_info;
2658 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2659 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
2660 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
2661 gimple def_stmt;
2663 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
2664 if (pattern_stmt_info == NULL)
2666 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2667 bb_vinfo);
2668 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2670 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
2672 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2673 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
2674 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2675 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2676 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2677 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
2678 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2679 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2680 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
2682 gimple_stmt_iterator si;
2683 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2684 !gsi_end_p (si); gsi_next (&si))
2686 def_stmt = gsi_stmt (si);
2687 def_stmt_info = vinfo_for_stmt (def_stmt);
2688 if (def_stmt_info == NULL)
2690 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2691 bb_vinfo);
2692 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2694 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2695 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2696 STMT_VINFO_DEF_TYPE (def_stmt_info)
2697 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2698 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2699 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
2704 /* Function vect_pattern_recog_1
2706 Input:
2707 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2708 computation pattern.
2709 STMT: A stmt from which the pattern search should start.
2711 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2712 expression that computes the same functionality and can be used to
2713 replace the sequence of stmts that are involved in the pattern.
2715 Output:
2716 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2717 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2718 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2719 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2720 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2721 to the available target pattern.
2723 This function also does some bookkeeping, as explained in the documentation
2724 for vect_recog_pattern. */
2726 static void
2727 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2728 gimple_stmt_iterator si,
2729 vec<gimple> *stmts_to_replace)
2731 gimple stmt = gsi_stmt (si), pattern_stmt;
2732 stmt_vec_info stmt_info;
2733 loop_vec_info loop_vinfo;
2734 tree pattern_vectype;
2735 tree type_in, type_out;
2736 enum tree_code code;
2737 int i;
2738 gimple next;
2740 stmts_to_replace->truncate (0);
2741 stmts_to_replace->quick_push (stmt);
2742 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
2743 if (!pattern_stmt)
2744 return;
2746 stmt = stmts_to_replace->last ();
2747 stmt_info = vinfo_for_stmt (stmt);
2748 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2750 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2752 /* No need to check target support (already checked by the pattern
2753 recognition function). */
2754 pattern_vectype = type_out ? type_out : type_in;
2756 else
2758 enum machine_mode vec_mode;
2759 enum insn_code icode;
2760 optab optab;
2762 /* Check target support */
2763 type_in = get_vectype_for_scalar_type (type_in);
2764 if (!type_in)
2765 return;
2766 if (type_out)
2767 type_out = get_vectype_for_scalar_type (type_out);
2768 else
2769 type_out = type_in;
2770 if (!type_out)
2771 return;
2772 pattern_vectype = type_out;
2774 if (is_gimple_assign (pattern_stmt))
2775 code = gimple_assign_rhs_code (pattern_stmt);
2776 else
2778 gcc_assert (is_gimple_call (pattern_stmt));
2779 code = CALL_EXPR;
2782 optab = optab_for_tree_code (code, type_in, optab_default);
2783 vec_mode = TYPE_MODE (type_in);
2784 if (!optab
2785 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
2786 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
2787 return;
2790 /* Found a vectorizable pattern. */
2791 if (dump_enabled_p ())
2793 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2794 "pattern recognized: ");
2795 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2798 /* Mark the stmts that are involved in the pattern. */
2799 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
2801 /* Patterns cannot be vectorized using SLP, because they change the order of
2802 computation. */
2803 if (loop_vinfo)
2804 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2805 if (next == stmt)
2806 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
2808 /* It is possible that additional pattern stmts are created and inserted in
2809 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2810 relevant statements. */
2811 for (i = 0; stmts_to_replace->iterate (i, &stmt)
2812 && (unsigned) i < (stmts_to_replace->length () - 1);
2813 i++)
2815 stmt_info = vinfo_for_stmt (stmt);
2816 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2817 if (dump_enabled_p ())
2819 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2820 "additional pattern stmt: ");
2821 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2824 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
2829 /* Function vect_pattern_recog
2831 Input:
2832 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2833 computation idioms.
2835 Output - for each computation idiom that is detected we create a new stmt
2836 that provides the same functionality and that can be vectorized. We
2837 also record some information in the struct_stmt_info of the relevant
2838 stmts, as explained below:
2840 At the entry to this function we have the following stmts, with the
2841 following initial value in the STMT_VINFO fields:
2843 stmt in_pattern_p related_stmt vec_stmt
2844 S1: a_i = .... - - -
2845 S2: a_2 = ..use(a_i).. - - -
2846 S3: a_1 = ..use(a_2).. - - -
2847 S4: a_0 = ..use(a_1).. - - -
2848 S5: ... = ..use(a_0).. - - -
2850 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2851 represented by a single stmt. We then:
2852 - create a new stmt S6 equivalent to the pattern (the stmt is not
2853 inserted into the code)
2854 - fill in the STMT_VINFO fields as follows:
2856 in_pattern_p related_stmt vec_stmt
2857 S1: a_i = .... - - -
2858 S2: a_2 = ..use(a_i).. - - -
2859 S3: a_1 = ..use(a_2).. - - -
2860 S4: a_0 = ..use(a_1).. true S6 -
2861 '---> S6: a_new = .... - S4 -
2862 S5: ... = ..use(a_0).. - - -
2864 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2865 to each other through the RELATED_STMT field).
2867 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2868 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2869 remain irrelevant unless used by stmts other than S4.
2871 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2872 (because they are marked as irrelevant). It will vectorize S6, and record
2873 a pointer to the new vector stmt VS6 from S6 (as usual).
2874 S4 will be skipped, and S5 will be vectorized as usual:
2876 in_pattern_p related_stmt vec_stmt
2877 S1: a_i = .... - - -
2878 S2: a_2 = ..use(a_i).. - - -
2879 S3: a_1 = ..use(a_2).. - - -
2880 > VS6: va_new = .... - - -
2881 S4: a_0 = ..use(a_1).. true S6 VS6
2882 '---> S6: a_new = .... - S4 VS6
2883 > VS5: ... = ..vuse(va_new).. - - -
2884 S5: ... = ..use(a_0).. - - -
2886 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2887 elsewhere), and we'll end up with:
2889 VS6: va_new = ....
2890 VS5: ... = ..vuse(va_new)..
2892 In case of more than one pattern statements, e.g., widen-mult with
2893 intermediate type:
2895 S1 a_t = ;
2896 S2 a_T = (TYPE) a_t;
2897 '--> S3: a_it = (interm_type) a_t;
2898 S4 prod_T = a_T * CONST;
2899 '--> S5: prod_T' = a_it w* CONST;
2901 there may be other users of a_T outside the pattern. In that case S2 will
2902 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2903 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2904 be recorded in S3. */
2906 void
2907 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2909 struct loop *loop;
2910 basic_block *bbs;
2911 unsigned int nbbs;
2912 gimple_stmt_iterator si;
2913 unsigned int i, j;
2914 vect_recog_func_ptr vect_recog_func;
2915 vec<gimple> stmts_to_replace;
2916 stmts_to_replace.create (1);
2917 gimple stmt;
2919 if (dump_enabled_p ())
2920 dump_printf_loc (MSG_NOTE, vect_location,
2921 "=== vect_pattern_recog ===");
2923 if (loop_vinfo)
2925 loop = LOOP_VINFO_LOOP (loop_vinfo);
2926 bbs = LOOP_VINFO_BBS (loop_vinfo);
2927 nbbs = loop->num_nodes;
2929 else
2931 bbs = &BB_VINFO_BB (bb_vinfo);
2932 nbbs = 1;
2935 /* Scan through the loop stmts, applying the pattern recognition
2936 functions starting at each stmt visited: */
2937 for (i = 0; i < nbbs; i++)
2939 basic_block bb = bbs[i];
2940 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2942 if (bb_vinfo && (stmt = gsi_stmt (si))
2943 && vinfo_for_stmt (stmt)
2944 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2945 continue;
2947 /* Scan over all generic vect_recog_xxx_pattern functions. */
2948 for (j = 0; j < NUM_PATTERNS; j++)
2950 vect_recog_func = vect_vect_recog_func_ptrs[j];
2951 vect_pattern_recog_1 (vect_recog_func, si,
2952 &stmts_to_replace);
2957 stmts_to_replace.release ();