2014-10-31 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-patterns.c
blob8478dfcb7150c7e01f6879d55d699c7cc3f888bf
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "stor-layout.h"
27 #include "target.h"
28 #include "predict.h"
29 #include "vec.h"
30 #include "hashtab.h"
31 #include "hash-set.h"
32 #include "machmode.h"
33 #include "hard-reg-set.h"
34 #include "input.h"
35 #include "function.h"
36 #include "dominance.h"
37 #include "basic-block.h"
38 #include "gimple-pretty-print.h"
39 #include "tree-ssa-alias.h"
40 #include "internal-fn.h"
41 #include "tree-eh.h"
42 #include "gimple-expr.h"
43 #include "is-a.h"
44 #include "gimple.h"
45 #include "gimplify.h"
46 #include "gimple-iterator.h"
47 #include "gimple-ssa.h"
48 #include "tree-phinodes.h"
49 #include "ssa-iterators.h"
50 #include "stringpool.h"
51 #include "tree-ssanames.h"
52 #include "cfgloop.h"
53 #include "expr.h"
54 #include "optabs.h"
55 #include "params.h"
56 #include "tree-data-ref.h"
57 #include "tree-vectorizer.h"
58 #include "recog.h" /* FIXME: for insn_data */
59 #include "diagnostic-core.h"
60 #include "dumpfile.h"
61 #include "builtins.h"
63 /* Pattern recognition functions */
64 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
65 tree *);
66 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
67 tree *);
68 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
69 tree *);
70 static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
71 tree *);
72 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
73 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
74 tree *);
75 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
76 tree *, tree *);
77 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
78 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
79 tree *, tree *);
80 static gimple vect_recog_divmod_pattern (vec<gimple> *,
81 tree *, tree *);
82 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
83 tree *, tree *);
84 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
85 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
86 vect_recog_widen_mult_pattern,
87 vect_recog_widen_sum_pattern,
88 vect_recog_dot_prod_pattern,
89 vect_recog_sad_pattern,
90 vect_recog_pow_pattern,
91 vect_recog_widen_shift_pattern,
92 vect_recog_over_widening_pattern,
93 vect_recog_rotate_pattern,
94 vect_recog_vector_vector_shift_pattern,
95 vect_recog_divmod_pattern,
96 vect_recog_mixed_size_cond_pattern,
97 vect_recog_bool_pattern};
99 static inline void
100 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
102 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
103 stmt);
106 static inline void
107 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
109 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
110 append_pattern_def_seq (stmt_info, stmt);
113 /* Check whether STMT2 is in the same loop or basic block as STMT1.
114 Which of the two applies depends on whether we're currently doing
115 loop-based or basic-block-based vectorization, as determined by
116 the vinfo_for_stmt for STMT1 (which must be defined).
118 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
119 to be defined as well. */
121 static bool
122 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
125 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
126 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
128 if (!gimple_bb (stmt2))
129 return false;
131 if (loop_vinfo)
133 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
134 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
135 return false;
137 else
139 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
140 || gimple_code (stmt2) == GIMPLE_PHI)
141 return false;
144 gcc_assert (vinfo_for_stmt (stmt2));
145 return true;
148 /* If the LHS of DEF_STMT has a single use, and that statement is
149 in the same loop or basic block, return it. */
151 static gimple
152 vect_single_imm_use (gimple def_stmt)
154 tree lhs = gimple_assign_lhs (def_stmt);
155 use_operand_p use_p;
156 gimple use_stmt;
158 if (!single_imm_use (lhs, &use_p, &use_stmt))
159 return NULL;
161 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
162 return NULL;
164 return use_stmt;
167 /* Check whether NAME, an ssa-name used in USE_STMT,
168 is a result of a type promotion, such that:
169 DEF_STMT: NAME = NOP (name0)
170 If CHECK_SIGN is TRUE, check that either both types are signed or both are
171 unsigned. */
173 static bool
174 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
175 tree *orig_type, gimple *def_stmt, bool *promotion)
177 tree dummy;
178 gimple dummy_gimple;
179 loop_vec_info loop_vinfo;
180 stmt_vec_info stmt_vinfo;
181 tree type = TREE_TYPE (name);
182 tree oprnd0;
183 enum vect_def_type dt;
184 tree def;
185 bb_vec_info bb_vinfo;
187 stmt_vinfo = vinfo_for_stmt (use_stmt);
188 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
189 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
190 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
191 &def, &dt))
192 return false;
194 if (dt != vect_internal_def
195 && dt != vect_external_def && dt != vect_constant_def)
196 return false;
198 if (!*def_stmt)
199 return false;
201 if (!is_gimple_assign (*def_stmt))
202 return false;
204 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
205 return false;
207 oprnd0 = gimple_assign_rhs1 (*def_stmt);
209 *orig_type = TREE_TYPE (oprnd0);
210 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
211 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
212 return false;
214 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
215 *promotion = true;
216 else
217 *promotion = false;
219 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
220 bb_vinfo, &dummy_gimple, &dummy, &dt))
221 return false;
223 return true;
226 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
227 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
229 static tree
230 vect_recog_temp_ssa_var (tree type, gimple stmt)
232 return make_temp_ssa_name (type, stmt, "patt");
235 /* Function vect_recog_dot_prod_pattern
237 Try to find the following pattern:
239 type x_t, y_t;
240 TYPE1 prod;
241 TYPE2 sum = init;
242 loop:
243 sum_0 = phi <init, sum_1>
244 S1 x_t = ...
245 S2 y_t = ...
246 S3 x_T = (TYPE1) x_t;
247 S4 y_T = (TYPE1) y_t;
248 S5 prod = x_T * y_T;
249 [S6 prod = (TYPE2) prod; #optional]
250 S7 sum_1 = prod + sum_0;
252 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
253 same size of 'TYPE1' or bigger. This is a special case of a reduction
254 computation.
256 Input:
258 * STMTS: Contains a stmt from which the pattern search begins. In the
259 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
260 will be detected.
262 Output:
264 * TYPE_IN: The type of the input arguments to the pattern.
266 * TYPE_OUT: The type of the output of this pattern.
268 * Return value: A new stmt that will be used to replace the sequence of
269 stmts that constitute the pattern. In this case it will be:
270 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
272 Note: The dot-prod idiom is a widening reduction pattern that is
273 vectorized without preserving all the intermediate results. It
274 produces only N/2 (widened) results (by summing up pairs of
275 intermediate results) rather than all N results. Therefore, we
276 cannot allow this pattern when we want to get all the results and in
277 the correct order (as is the case when this computation is in an
278 inner-loop nested in an outer-loop that us being vectorized). */
280 static gimple
281 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
282 tree *type_out)
284 gimple stmt, last_stmt = (*stmts)[0];
285 tree oprnd0, oprnd1;
286 tree oprnd00, oprnd01;
287 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
288 tree type, half_type;
289 gimple pattern_stmt;
290 tree prod_type;
291 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
292 struct loop *loop;
293 tree var;
294 bool promotion;
296 if (!loop_info)
297 return NULL;
299 loop = LOOP_VINFO_LOOP (loop_info);
301 if (!is_gimple_assign (last_stmt))
302 return NULL;
304 type = gimple_expr_type (last_stmt);
306 /* Look for the following pattern
307 DX = (TYPE1) X;
308 DY = (TYPE1) Y;
309 DPROD = DX * DY;
310 DDPROD = (TYPE2) DPROD;
311 sum_1 = DDPROD + sum_0;
312 In which
313 - DX is double the size of X
314 - DY is double the size of Y
315 - DX, DY, DPROD all have the same type
316 - sum is the same size of DPROD or bigger
317 - sum has been recognized as a reduction variable.
319 This is equivalent to:
320 DPROD = X w* Y; #widen mult
321 sum_1 = DPROD w+ sum_0; #widen summation
323 DPROD = X w* Y; #widen mult
324 sum_1 = DPROD + sum_0; #summation
327 /* Starting from LAST_STMT, follow the defs of its uses in search
328 of the above pattern. */
330 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
331 return NULL;
333 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
335 /* Has been detected as widening-summation? */
337 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
338 type = gimple_expr_type (stmt);
339 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
340 return NULL;
341 oprnd0 = gimple_assign_rhs1 (stmt);
342 oprnd1 = gimple_assign_rhs2 (stmt);
343 half_type = TREE_TYPE (oprnd0);
345 else
347 gimple def_stmt;
349 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
350 return NULL;
351 oprnd0 = gimple_assign_rhs1 (last_stmt);
352 oprnd1 = gimple_assign_rhs2 (last_stmt);
353 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
354 || !types_compatible_p (TREE_TYPE (oprnd1), type))
355 return NULL;
356 stmt = last_stmt;
358 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
359 &promotion)
360 && promotion)
362 stmt = def_stmt;
363 oprnd0 = gimple_assign_rhs1 (stmt);
365 else
366 half_type = type;
369 /* So far so good. Since last_stmt was detected as a (summation) reduction,
370 we know that oprnd1 is the reduction variable (defined by a loop-header
371 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
372 Left to check that oprnd0 is defined by a (widen_)mult_expr */
373 if (TREE_CODE (oprnd0) != SSA_NAME)
374 return NULL;
376 prod_type = half_type;
377 stmt = SSA_NAME_DEF_STMT (oprnd0);
379 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
380 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
381 return NULL;
383 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
384 inside the loop (in case we are analyzing an outer-loop). */
385 if (!is_gimple_assign (stmt))
386 return NULL;
387 stmt_vinfo = vinfo_for_stmt (stmt);
388 gcc_assert (stmt_vinfo);
389 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
390 return NULL;
391 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
392 return NULL;
393 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
395 /* Has been detected as a widening multiplication? */
397 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
398 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
399 return NULL;
400 stmt_vinfo = vinfo_for_stmt (stmt);
401 gcc_assert (stmt_vinfo);
402 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
403 oprnd00 = gimple_assign_rhs1 (stmt);
404 oprnd01 = gimple_assign_rhs2 (stmt);
405 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
406 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
408 else
410 tree half_type0, half_type1;
411 gimple def_stmt;
412 tree oprnd0, oprnd1;
414 oprnd0 = gimple_assign_rhs1 (stmt);
415 oprnd1 = gimple_assign_rhs2 (stmt);
416 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
417 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
418 return NULL;
419 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
420 &promotion)
421 || !promotion)
422 return NULL;
423 oprnd00 = gimple_assign_rhs1 (def_stmt);
424 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
425 &promotion)
426 || !promotion)
427 return NULL;
428 oprnd01 = gimple_assign_rhs1 (def_stmt);
429 if (!types_compatible_p (half_type0, half_type1))
430 return NULL;
431 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
432 return NULL;
435 half_type = TREE_TYPE (oprnd00);
436 *type_in = half_type;
437 *type_out = type;
439 /* Pattern detected. Create a stmt to be used to replace the pattern: */
440 var = vect_recog_temp_ssa_var (type, NULL);
441 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
442 oprnd00, oprnd01, oprnd1);
444 if (dump_enabled_p ())
446 dump_printf_loc (MSG_NOTE, vect_location,
447 "vect_recog_dot_prod_pattern: detected: ");
448 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
449 dump_printf (MSG_NOTE, "\n");
452 /* We don't allow changing the order of the computation in the inner-loop
453 when doing outer-loop vectorization. */
454 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
456 return pattern_stmt;
460 /* Function vect_recog_sad_pattern
462 Try to find the following Sum of Absolute Difference (SAD) pattern:
464 type x_t, y_t;
465 signed TYPE1 diff, abs_diff;
466 TYPE2 sum = init;
467 loop:
468 sum_0 = phi <init, sum_1>
469 S1 x_t = ...
470 S2 y_t = ...
471 S3 x_T = (TYPE1) x_t;
472 S4 y_T = (TYPE1) y_t;
473 S5 diff = x_T - y_T;
474 S6 abs_diff = ABS_EXPR <diff>;
475 [S7 abs_diff = (TYPE2) abs_diff; #optional]
476 S8 sum_1 = abs_diff + sum_0;
478 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
479 same size of 'TYPE1' or bigger. This is a special case of a reduction
480 computation.
482 Input:
484 * STMTS: Contains a stmt from which the pattern search begins. In the
485 example, when this function is called with S8, the pattern
486 {S3,S4,S5,S6,S7,S8} will be detected.
488 Output:
490 * TYPE_IN: The type of the input arguments to the pattern.
492 * TYPE_OUT: The type of the output of this pattern.
494 * Return value: A new stmt that will be used to replace the sequence of
495 stmts that constitute the pattern. In this case it will be:
496 SAD_EXPR <x_t, y_t, sum_0>
499 static gimple
500 vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
501 tree *type_out)
503 gimple last_stmt = (*stmts)[0];
504 tree sad_oprnd0, sad_oprnd1;
505 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
506 tree half_type;
507 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
508 struct loop *loop;
509 bool promotion;
511 if (!loop_info)
512 return NULL;
514 loop = LOOP_VINFO_LOOP (loop_info);
516 if (!is_gimple_assign (last_stmt))
517 return NULL;
519 tree sum_type = gimple_expr_type (last_stmt);
521 /* Look for the following pattern
522 DX = (TYPE1) X;
523 DY = (TYPE1) Y;
524 DDIFF = DX - DY;
525 DAD = ABS_EXPR <DDIFF>;
526 DDPROD = (TYPE2) DPROD;
527 sum_1 = DAD + sum_0;
528 In which
529 - DX is at least double the size of X
530 - DY is at least double the size of Y
531 - DX, DY, DDIFF, DAD all have the same type
532 - sum is the same size of DAD or bigger
533 - sum has been recognized as a reduction variable.
535 This is equivalent to:
536 DDIFF = X w- Y; #widen sub
537 DAD = ABS_EXPR <DDIFF>;
538 sum_1 = DAD w+ sum_0; #widen summation
540 DDIFF = X w- Y; #widen sub
541 DAD = ABS_EXPR <DDIFF>;
542 sum_1 = DAD + sum_0; #summation
545 /* Starting from LAST_STMT, follow the defs of its uses in search
546 of the above pattern. */
548 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
549 return NULL;
551 tree plus_oprnd0, plus_oprnd1;
553 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
555 /* Has been detected as widening-summation? */
557 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
558 sum_type = gimple_expr_type (stmt);
559 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
560 return NULL;
561 plus_oprnd0 = gimple_assign_rhs1 (stmt);
562 plus_oprnd1 = gimple_assign_rhs2 (stmt);
563 half_type = TREE_TYPE (plus_oprnd0);
565 else
567 gimple def_stmt;
569 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
570 return NULL;
571 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
572 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
573 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
574 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
575 return NULL;
577 /* The type conversion could be promotion, demotion,
578 or just signed -> unsigned. */
579 if (type_conversion_p (plus_oprnd0, last_stmt, false,
580 &half_type, &def_stmt, &promotion))
581 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
582 else
583 half_type = sum_type;
586 /* So far so good. Since last_stmt was detected as a (summation) reduction,
587 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
588 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
589 Then check that plus_oprnd0 is defined by an abs_expr. */
591 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
592 return NULL;
594 tree abs_type = half_type;
595 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
597 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
598 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
599 return NULL;
601 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
602 inside the loop (in case we are analyzing an outer-loop). */
603 if (!is_gimple_assign (abs_stmt))
604 return NULL;
606 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
607 gcc_assert (abs_stmt_vinfo);
608 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
609 return NULL;
610 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
611 return NULL;
613 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
614 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
615 return NULL;
616 if (TYPE_UNSIGNED (abs_type))
617 return NULL;
619 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
621 if (TREE_CODE (abs_oprnd) != SSA_NAME)
622 return NULL;
624 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
626 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
627 if (!gimple_bb (diff_stmt)
628 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
629 return NULL;
631 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
632 inside the loop (in case we are analyzing an outer-loop). */
633 if (!is_gimple_assign (diff_stmt))
634 return NULL;
636 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
637 gcc_assert (diff_stmt_vinfo);
638 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
639 return NULL;
640 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
641 return NULL;
643 tree half_type0, half_type1;
644 gimple def_stmt;
646 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
647 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
649 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
650 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
651 return NULL;
652 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
653 &half_type0, &def_stmt, &promotion)
654 || !promotion)
655 return NULL;
656 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
658 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
659 &half_type1, &def_stmt, &promotion)
660 || !promotion)
661 return NULL;
662 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
664 if (!types_compatible_p (half_type0, half_type1))
665 return NULL;
666 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
667 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
668 return NULL;
670 *type_in = TREE_TYPE (sad_oprnd0);
671 *type_out = sum_type;
673 /* Pattern detected. Create a stmt to be used to replace the pattern: */
674 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
675 gimple pattern_stmt = gimple_build_assign_with_ops
676 (SAD_EXPR, var, sad_oprnd0, sad_oprnd1, plus_oprnd1);
678 if (dump_enabled_p ())
680 dump_printf_loc (MSG_NOTE, vect_location,
681 "vect_recog_sad_pattern: detected: ");
682 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
683 dump_printf (MSG_NOTE, "\n");
686 /* We don't allow changing the order of the computation in the inner-loop
687 when doing outer-loop vectorization. */
688 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
690 return pattern_stmt;
694 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
695 and LSHIFT_EXPR.
697 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
698 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
700 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
701 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
702 that satisfies the above restrictions, we can perform a widening opeartion
703 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
704 with a_it = (interm_type) a_t; */
706 static bool
707 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
708 tree const_oprnd, tree *oprnd,
709 vec<gimple> *stmts, tree type,
710 tree *half_type, gimple def_stmt)
712 tree new_type, new_oprnd;
713 gimple new_stmt;
715 if (code != MULT_EXPR && code != LSHIFT_EXPR)
716 return false;
718 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
719 || (code == LSHIFT_EXPR
720 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
721 != 1))
722 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
724 /* CONST_OPRND is a constant of HALF_TYPE. */
725 *oprnd = gimple_assign_rhs1 (def_stmt);
726 return true;
729 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
730 return false;
732 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
733 return false;
735 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
736 a type 2 times bigger than HALF_TYPE. */
737 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
738 TYPE_UNSIGNED (type));
739 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
740 || (code == LSHIFT_EXPR
741 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
742 return false;
744 /* Use NEW_TYPE for widening operation. */
745 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
747 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
748 /* Check if the already created pattern stmt is what we need. */
749 if (!is_gimple_assign (new_stmt)
750 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
751 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
752 return false;
754 stmts->safe_push (def_stmt);
755 *oprnd = gimple_assign_lhs (new_stmt);
757 else
759 /* Create a_T = (NEW_TYPE) a_t; */
760 *oprnd = gimple_assign_rhs1 (def_stmt);
761 new_oprnd = make_ssa_name (new_type, NULL);
762 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
763 NULL_TREE);
764 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
765 stmts->safe_push (def_stmt);
766 *oprnd = new_oprnd;
769 *half_type = new_type;
770 return true;
774 /* Function vect_recog_widen_mult_pattern
776 Try to find the following pattern:
778 type1 a_t;
779 type2 b_t;
780 TYPE a_T, b_T, prod_T;
782 S1 a_t = ;
783 S2 b_t = ;
784 S3 a_T = (TYPE) a_t;
785 S4 b_T = (TYPE) b_t;
786 S5 prod_T = a_T * b_T;
788 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
790 Also detect unsigned cases:
792 unsigned type1 a_t;
793 unsigned type2 b_t;
794 unsigned TYPE u_prod_T;
795 TYPE a_T, b_T, prod_T;
797 S1 a_t = ;
798 S2 b_t = ;
799 S3 a_T = (TYPE) a_t;
800 S4 b_T = (TYPE) b_t;
801 S5 prod_T = a_T * b_T;
802 S6 u_prod_T = (unsigned TYPE) prod_T;
804 and multiplication by constants:
806 type a_t;
807 TYPE a_T, prod_T;
809 S1 a_t = ;
810 S3 a_T = (TYPE) a_t;
811 S5 prod_T = a_T * CONST;
813 A special case of multiplication by constants is when 'TYPE' is 4 times
814 bigger than 'type', but CONST fits an intermediate type 2 times smaller
815 than 'TYPE'. In that case we create an additional pattern stmt for S3
816 to create a variable of the intermediate type, and perform widen-mult
817 on the intermediate type as well:
819 type a_t;
820 interm_type a_it;
821 TYPE a_T, prod_T, prod_T';
823 S1 a_t = ;
824 S3 a_T = (TYPE) a_t;
825 '--> a_it = (interm_type) a_t;
826 S5 prod_T = a_T * CONST;
827 '--> prod_T' = a_it w* CONST;
829 Input/Output:
831 * STMTS: Contains a stmt from which the pattern search begins. In the
832 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
833 is detected. In case of unsigned widen-mult, the original stmt (S5) is
834 replaced with S6 in STMTS. In case of multiplication by a constant
835 of an intermediate type (the last case above), STMTS also contains S3
836 (inserted before S5).
838 Output:
840 * TYPE_IN: The type of the input arguments to the pattern.
842 * TYPE_OUT: The type of the output of this pattern.
844 * Return value: A new stmt that will be used to replace the sequence of
845 stmts that constitute the pattern. In this case it will be:
846 WIDEN_MULT <a_t, b_t>
847 If the result of WIDEN_MULT needs to be converted to a larger type, the
848 returned stmt will be this type conversion stmt.
851 static gimple
852 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
853 tree *type_in, tree *type_out)
855 gimple last_stmt = stmts->pop ();
856 gimple def_stmt0, def_stmt1;
857 tree oprnd0, oprnd1;
858 tree type, half_type0, half_type1;
859 gimple new_stmt = NULL, pattern_stmt = NULL;
860 tree vectype, vecitype;
861 tree var;
862 enum tree_code dummy_code;
863 int dummy_int;
864 vec<tree> dummy_vec;
865 bool op1_ok;
866 bool promotion;
868 if (!is_gimple_assign (last_stmt))
869 return NULL;
871 type = gimple_expr_type (last_stmt);
873 /* Starting from LAST_STMT, follow the defs of its uses in search
874 of the above pattern. */
876 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
877 return NULL;
879 oprnd0 = gimple_assign_rhs1 (last_stmt);
880 oprnd1 = gimple_assign_rhs2 (last_stmt);
881 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
882 || !types_compatible_p (TREE_TYPE (oprnd1), type))
883 return NULL;
885 /* Check argument 0. */
886 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
887 &promotion)
888 || !promotion)
889 return NULL;
890 /* Check argument 1. */
891 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
892 &def_stmt1, &promotion);
894 if (op1_ok && promotion)
896 oprnd0 = gimple_assign_rhs1 (def_stmt0);
897 oprnd1 = gimple_assign_rhs1 (def_stmt1);
899 else
901 if (TREE_CODE (oprnd1) == INTEGER_CST
902 && TREE_CODE (half_type0) == INTEGER_TYPE
903 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
904 &oprnd0, stmts, type,
905 &half_type0, def_stmt0))
907 half_type1 = half_type0;
908 oprnd1 = fold_convert (half_type1, oprnd1);
910 else
911 return NULL;
914 /* If the two arguments have different sizes, convert the one with
915 the smaller type into the larger type. */
916 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
918 tree* oprnd = NULL;
919 gimple def_stmt = NULL;
921 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
923 def_stmt = def_stmt0;
924 half_type0 = half_type1;
925 oprnd = &oprnd0;
927 else
929 def_stmt = def_stmt1;
930 half_type1 = half_type0;
931 oprnd = &oprnd1;
934 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
935 tree new_oprnd = make_ssa_name (half_type0, NULL);
936 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
937 old_oprnd, NULL_TREE);
938 *oprnd = new_oprnd;
941 /* Handle unsigned case. Look for
942 S6 u_prod_T = (unsigned TYPE) prod_T;
943 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
944 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
946 gimple use_stmt;
947 tree use_lhs;
948 tree use_type;
950 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
951 return NULL;
953 use_stmt = vect_single_imm_use (last_stmt);
954 if (!use_stmt || !is_gimple_assign (use_stmt)
955 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
956 return NULL;
958 use_lhs = gimple_assign_lhs (use_stmt);
959 use_type = TREE_TYPE (use_lhs);
960 if (!INTEGRAL_TYPE_P (use_type)
961 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
962 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
963 return NULL;
965 type = use_type;
966 last_stmt = use_stmt;
969 if (!types_compatible_p (half_type0, half_type1))
970 return NULL;
972 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
973 to get an intermediate result of type ITYPE. In this case we need
974 to build a statement to convert this intermediate result to type TYPE. */
975 tree itype = type;
976 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
977 itype = build_nonstandard_integer_type
978 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
979 TYPE_UNSIGNED (type));
981 /* Pattern detected. */
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_recog_widen_mult_pattern: detected:\n");
986 /* Check target support */
987 vectype = get_vectype_for_scalar_type (half_type0);
988 vecitype = get_vectype_for_scalar_type (itype);
989 if (!vectype
990 || !vecitype
991 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
992 vecitype, vectype,
993 &dummy_code, &dummy_code,
994 &dummy_int, &dummy_vec))
995 return NULL;
997 *type_in = vectype;
998 *type_out = get_vectype_for_scalar_type (type);
1000 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1001 var = vect_recog_temp_ssa_var (itype, NULL);
1002 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
1003 oprnd1);
1005 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1006 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1007 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1008 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1010 /* If the original two operands have different sizes, we may need to convert
1011 the smaller one into the larget type. If this is the case, at this point
1012 the new stmt is already built. */
1013 if (new_stmt)
1015 append_pattern_def_seq (stmt_vinfo, new_stmt);
1016 stmt_vec_info new_stmt_info
1017 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
1018 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1019 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1022 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1023 the result of the widen-mult operation into type TYPE. */
1024 if (itype != type)
1026 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1027 stmt_vec_info pattern_stmt_info
1028 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1029 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1030 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1031 pattern_stmt
1032 = gimple_build_assign_with_ops (NOP_EXPR,
1033 vect_recog_temp_ssa_var (type, NULL),
1034 gimple_assign_lhs (pattern_stmt),
1035 NULL_TREE);
1038 if (dump_enabled_p ())
1039 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1041 stmts->safe_push (last_stmt);
1042 return pattern_stmt;
1046 /* Function vect_recog_pow_pattern
1048 Try to find the following pattern:
1050 x = POW (y, N);
1052 with POW being one of pow, powf, powi, powif and N being
1053 either 2 or 0.5.
1055 Input:
1057 * LAST_STMT: A stmt from which the pattern search begins.
1059 Output:
1061 * TYPE_IN: The type of the input arguments to the pattern.
1063 * TYPE_OUT: The type of the output of this pattern.
1065 * Return value: A new stmt that will be used to replace the sequence of
1066 stmts that constitute the pattern. In this case it will be:
1067 x = x * x
1069 x = sqrt (x)
1072 static gimple
1073 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
1074 tree *type_out)
1076 gimple last_stmt = (*stmts)[0];
1077 tree fn, base, exp = NULL;
1078 gimple stmt;
1079 tree var;
1081 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1082 return NULL;
1084 fn = gimple_call_fndecl (last_stmt);
1085 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1086 return NULL;
1088 switch (DECL_FUNCTION_CODE (fn))
1090 case BUILT_IN_POWIF:
1091 case BUILT_IN_POWI:
1092 case BUILT_IN_POWF:
1093 case BUILT_IN_POW:
1094 base = gimple_call_arg (last_stmt, 0);
1095 exp = gimple_call_arg (last_stmt, 1);
1096 if (TREE_CODE (exp) != REAL_CST
1097 && TREE_CODE (exp) != INTEGER_CST)
1098 return NULL;
1099 break;
1101 default:
1102 return NULL;
1105 /* We now have a pow or powi builtin function call with a constant
1106 exponent. */
1108 *type_out = NULL_TREE;
1110 /* Catch squaring. */
1111 if ((tree_fits_shwi_p (exp)
1112 && tree_to_shwi (exp) == 2)
1113 || (TREE_CODE (exp) == REAL_CST
1114 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
1116 *type_in = TREE_TYPE (base);
1118 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1119 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
1120 return stmt;
1123 /* Catch square root. */
1124 if (TREE_CODE (exp) == REAL_CST
1125 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1127 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1128 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1129 if (*type_in)
1131 gimple stmt = gimple_build_call (newfn, 1, base);
1132 if (vectorizable_function (stmt, *type_in, *type_in)
1133 != NULL_TREE)
1135 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1136 gimple_call_set_lhs (stmt, var);
1137 return stmt;
1142 return NULL;
1146 /* Function vect_recog_widen_sum_pattern
1148 Try to find the following pattern:
1150 type x_t;
1151 TYPE x_T, sum = init;
1152 loop:
1153 sum_0 = phi <init, sum_1>
1154 S1 x_t = *p;
1155 S2 x_T = (TYPE) x_t;
1156 S3 sum_1 = x_T + sum_0;
1158 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1159 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1160 a special case of a reduction computation.
1162 Input:
1164 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1165 when this function is called with S3, the pattern {S2,S3} will be detected.
1167 Output:
1169 * TYPE_IN: The type of the input arguments to the pattern.
1171 * TYPE_OUT: The type of the output of this pattern.
1173 * Return value: A new stmt that will be used to replace the sequence of
1174 stmts that constitute the pattern. In this case it will be:
1175 WIDEN_SUM <x_t, sum_0>
1177 Note: The widening-sum idiom is a widening reduction pattern that is
1178 vectorized without preserving all the intermediate results. It
1179 produces only N/2 (widened) results (by summing up pairs of
1180 intermediate results) rather than all N results. Therefore, we
1181 cannot allow this pattern when we want to get all the results and in
1182 the correct order (as is the case when this computation is in an
1183 inner-loop nested in an outer-loop that us being vectorized). */
1185 static gimple
1186 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
1187 tree *type_out)
1189 gimple stmt, last_stmt = (*stmts)[0];
1190 tree oprnd0, oprnd1;
1191 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1192 tree type, half_type;
1193 gimple pattern_stmt;
1194 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1195 struct loop *loop;
1196 tree var;
1197 bool promotion;
1199 if (!loop_info)
1200 return NULL;
1202 loop = LOOP_VINFO_LOOP (loop_info);
1204 if (!is_gimple_assign (last_stmt))
1205 return NULL;
1207 type = gimple_expr_type (last_stmt);
1209 /* Look for the following pattern
1210 DX = (TYPE) X;
1211 sum_1 = DX + sum_0;
1212 In which DX is at least double the size of X, and sum_1 has been
1213 recognized as a reduction variable.
1216 /* Starting from LAST_STMT, follow the defs of its uses in search
1217 of the above pattern. */
1219 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1220 return NULL;
1222 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
1223 return NULL;
1225 oprnd0 = gimple_assign_rhs1 (last_stmt);
1226 oprnd1 = gimple_assign_rhs2 (last_stmt);
1227 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1228 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1229 return NULL;
1231 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1232 we know that oprnd1 is the reduction variable (defined by a loop-header
1233 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1234 Left to check that oprnd0 is defined by a cast from type 'type' to type
1235 'TYPE'. */
1237 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1238 &promotion)
1239 || !promotion)
1240 return NULL;
1242 oprnd0 = gimple_assign_rhs1 (stmt);
1243 *type_in = half_type;
1244 *type_out = type;
1246 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1247 var = vect_recog_temp_ssa_var (type, NULL);
1248 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
1249 oprnd0, oprnd1);
1251 if (dump_enabled_p ())
1253 dump_printf_loc (MSG_NOTE, vect_location,
1254 "vect_recog_widen_sum_pattern: detected: ");
1255 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1256 dump_printf (MSG_NOTE, "\n");
1259 /* We don't allow changing the order of the computation in the inner-loop
1260 when doing outer-loop vectorization. */
1261 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
1263 return pattern_stmt;
1267 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1269 Input:
1270 STMT - a statement to check.
1271 DEF - we support operations with two operands, one of which is constant.
1272 The other operand can be defined by a demotion operation, or by a
1273 previous statement in a sequence of over-promoted operations. In the
1274 later case DEF is used to replace that operand. (It is defined by a
1275 pattern statement we created for the previous statement in the
1276 sequence).
1278 Input/output:
1279 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1280 NULL, it's the type of DEF.
1281 STMTS - additional pattern statements. If a pattern statement (type
1282 conversion) is created in this function, its original statement is
1283 added to STMTS.
1285 Output:
1286 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1287 operands to use in the new pattern statement for STMT (will be created
1288 in vect_recog_over_widening_pattern ()).
1289 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1290 statements for STMT: the first one is a type promotion and the second
1291 one is the operation itself. We return the type promotion statement
1292 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1293 the second pattern statement. */
1295 static bool
1296 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1297 tree *op0, tree *op1, gimple *new_def_stmt,
1298 vec<gimple> *stmts)
1300 enum tree_code code;
1301 tree const_oprnd, oprnd;
1302 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1303 gimple def_stmt, new_stmt;
1304 bool first = false;
1305 bool promotion;
1307 *op0 = NULL_TREE;
1308 *op1 = NULL_TREE;
1309 *new_def_stmt = NULL;
1311 if (!is_gimple_assign (stmt))
1312 return false;
1314 code = gimple_assign_rhs_code (stmt);
1315 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1316 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1317 return false;
1319 oprnd = gimple_assign_rhs1 (stmt);
1320 const_oprnd = gimple_assign_rhs2 (stmt);
1321 type = gimple_expr_type (stmt);
1323 if (TREE_CODE (oprnd) != SSA_NAME
1324 || TREE_CODE (const_oprnd) != INTEGER_CST)
1325 return false;
1327 /* If oprnd has other uses besides that in stmt we cannot mark it
1328 as being part of a pattern only. */
1329 if (!has_single_use (oprnd))
1330 return false;
1332 /* If we are in the middle of a sequence, we use DEF from a previous
1333 statement. Otherwise, OPRND has to be a result of type promotion. */
1334 if (*new_type)
1336 half_type = *new_type;
1337 oprnd = def;
1339 else
1341 first = true;
1342 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1343 &promotion)
1344 || !promotion
1345 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1346 return false;
1349 /* Can we perform the operation on a smaller type? */
1350 switch (code)
1352 case BIT_IOR_EXPR:
1353 case BIT_XOR_EXPR:
1354 case BIT_AND_EXPR:
1355 if (!int_fits_type_p (const_oprnd, half_type))
1357 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1358 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1359 return false;
1361 interm_type = build_nonstandard_integer_type (
1362 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1363 if (!int_fits_type_p (const_oprnd, interm_type))
1364 return false;
1367 break;
1369 case LSHIFT_EXPR:
1370 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1371 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1372 return false;
1374 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1375 (e.g., if the original value was char, the shift amount is at most 8
1376 if we want to use short). */
1377 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1378 return false;
1380 interm_type = build_nonstandard_integer_type (
1381 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1383 if (!vect_supportable_shift (code, interm_type))
1384 return false;
1386 break;
1388 case RSHIFT_EXPR:
1389 if (vect_supportable_shift (code, half_type))
1390 break;
1392 /* Try intermediate type - HALF_TYPE is not supported. */
1393 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1394 return false;
1396 interm_type = build_nonstandard_integer_type (
1397 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1399 if (!vect_supportable_shift (code, interm_type))
1400 return false;
1402 break;
1404 default:
1405 gcc_unreachable ();
1408 /* There are four possible cases:
1409 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1410 the first statement in the sequence)
1411 a. The original, HALF_TYPE, is not enough - we replace the promotion
1412 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1413 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1414 promotion.
1415 2. OPRND is defined by a pattern statement we created.
1416 a. Its type is not sufficient for the operation, we create a new stmt:
1417 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1418 this statement in NEW_DEF_STMT, and it is later put in
1419 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1420 b. OPRND is good to use in the new statement. */
1421 if (first)
1423 if (interm_type)
1425 /* Replace the original type conversion HALF_TYPE->TYPE with
1426 HALF_TYPE->INTERM_TYPE. */
1427 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1429 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1430 /* Check if the already created pattern stmt is what we need. */
1431 if (!is_gimple_assign (new_stmt)
1432 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1433 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1434 return false;
1436 stmts->safe_push (def_stmt);
1437 oprnd = gimple_assign_lhs (new_stmt);
1439 else
1441 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1442 oprnd = gimple_assign_rhs1 (def_stmt);
1443 new_oprnd = make_ssa_name (interm_type, NULL);
1444 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1445 oprnd, NULL_TREE);
1446 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1447 stmts->safe_push (def_stmt);
1448 oprnd = new_oprnd;
1451 else
1453 /* Retrieve the operand before the type promotion. */
1454 oprnd = gimple_assign_rhs1 (def_stmt);
1457 else
1459 if (interm_type)
1461 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1462 new_oprnd = make_ssa_name (interm_type, NULL);
1463 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1464 oprnd, NULL_TREE);
1465 oprnd = new_oprnd;
1466 *new_def_stmt = new_stmt;
1469 /* Otherwise, OPRND is already set. */
1472 if (interm_type)
1473 *new_type = interm_type;
1474 else
1475 *new_type = half_type;
1477 *op0 = oprnd;
1478 *op1 = fold_convert (*new_type, const_oprnd);
1480 return true;
1484 /* Try to find a statement or a sequence of statements that can be performed
1485 on a smaller type:
1487 type x_t;
1488 TYPE x_T, res0_T, res1_T;
1489 loop:
1490 S1 x_t = *p;
1491 S2 x_T = (TYPE) x_t;
1492 S3 res0_T = op (x_T, C0);
1493 S4 res1_T = op (res0_T, C1);
1494 S5 ... = () res1_T; - type demotion
1496 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1497 constants.
1498 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1499 be 'type' or some intermediate type. For now, we expect S5 to be a type
1500 demotion operation. We also check that S3 and S4 have only one use. */
1502 static gimple
1503 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1504 tree *type_in, tree *type_out)
1506 gimple stmt = stmts->pop ();
1507 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1508 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1509 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1510 bool first;
1511 tree type = NULL;
1513 first = true;
1514 while (1)
1516 if (!vinfo_for_stmt (stmt)
1517 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1518 return NULL;
1520 new_def_stmt = NULL;
1521 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1522 &op0, &op1, &new_def_stmt,
1523 stmts))
1525 if (first)
1526 return NULL;
1527 else
1528 break;
1531 /* STMT can be performed on a smaller type. Check its uses. */
1532 use_stmt = vect_single_imm_use (stmt);
1533 if (!use_stmt || !is_gimple_assign (use_stmt))
1534 return NULL;
1536 /* Create pattern statement for STMT. */
1537 vectype = get_vectype_for_scalar_type (new_type);
1538 if (!vectype)
1539 return NULL;
1541 /* We want to collect all the statements for which we create pattern
1542 statetments, except for the case when the last statement in the
1543 sequence doesn't have a corresponding pattern statement. In such
1544 case we associate the last pattern statement with the last statement
1545 in the sequence. Therefore, we only add the original statement to
1546 the list if we know that it is not the last. */
1547 if (prev_stmt)
1548 stmts->safe_push (prev_stmt);
1550 var = vect_recog_temp_ssa_var (new_type, NULL);
1551 pattern_stmt
1552 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1553 op0, op1);
1554 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1555 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1557 if (dump_enabled_p ())
1559 dump_printf_loc (MSG_NOTE, vect_location,
1560 "created pattern stmt: ");
1561 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1562 dump_printf (MSG_NOTE, "\n");
1565 type = gimple_expr_type (stmt);
1566 prev_stmt = stmt;
1567 stmt = use_stmt;
1569 first = false;
1572 /* We got a sequence. We expect it to end with a type demotion operation.
1573 Otherwise, we quit (for now). There are three possible cases: the
1574 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1575 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1576 NEW_TYPE differs (we create a new conversion statement). */
1577 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1579 use_lhs = gimple_assign_lhs (use_stmt);
1580 use_type = TREE_TYPE (use_lhs);
1581 /* Support only type demotion or signedess change. */
1582 if (!INTEGRAL_TYPE_P (use_type)
1583 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1584 return NULL;
1586 /* Check that NEW_TYPE is not bigger than the conversion result. */
1587 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1588 return NULL;
1590 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1591 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1593 /* Create NEW_TYPE->USE_TYPE conversion. */
1594 new_oprnd = make_ssa_name (use_type, NULL);
1595 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1596 var, NULL_TREE);
1597 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1599 *type_in = get_vectype_for_scalar_type (new_type);
1600 *type_out = get_vectype_for_scalar_type (use_type);
1602 /* We created a pattern statement for the last statement in the
1603 sequence, so we don't need to associate it with the pattern
1604 statement created for PREV_STMT. Therefore, we add PREV_STMT
1605 to the list in order to mark it later in vect_pattern_recog_1. */
1606 if (prev_stmt)
1607 stmts->safe_push (prev_stmt);
1609 else
1611 if (prev_stmt)
1612 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1613 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1615 *type_in = vectype;
1616 *type_out = NULL_TREE;
1619 stmts->safe_push (use_stmt);
1621 else
1622 /* TODO: support general case, create a conversion to the correct type. */
1623 return NULL;
1625 /* Pattern detected. */
1626 if (dump_enabled_p ())
1628 dump_printf_loc (MSG_NOTE, vect_location,
1629 "vect_recog_over_widening_pattern: detected: ");
1630 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1631 dump_printf (MSG_NOTE, "\n");
1634 return pattern_stmt;
1637 /* Detect widening shift pattern:
1639 type a_t;
1640 TYPE a_T, res_T;
1642 S1 a_t = ;
1643 S2 a_T = (TYPE) a_t;
1644 S3 res_T = a_T << CONST;
1646 where type 'TYPE' is at least double the size of type 'type'.
1648 Also detect cases where the shift result is immediately converted
1649 to another type 'result_type' that is no larger in size than 'TYPE'.
1650 In those cases we perform a widen-shift that directly results in
1651 'result_type', to avoid a possible over-widening situation:
1653 type a_t;
1654 TYPE a_T, res_T;
1655 result_type res_result;
1657 S1 a_t = ;
1658 S2 a_T = (TYPE) a_t;
1659 S3 res_T = a_T << CONST;
1660 S4 res_result = (result_type) res_T;
1661 '--> res_result' = a_t w<< CONST;
1663 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1664 create an additional pattern stmt for S2 to create a variable of an
1665 intermediate type, and perform widen-shift on the intermediate type:
1667 type a_t;
1668 interm_type a_it;
1669 TYPE a_T, res_T, res_T';
1671 S1 a_t = ;
1672 S2 a_T = (TYPE) a_t;
1673 '--> a_it = (interm_type) a_t;
1674 S3 res_T = a_T << CONST;
1675 '--> res_T' = a_it <<* CONST;
1677 Input/Output:
1679 * STMTS: Contains a stmt from which the pattern search begins.
1680 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1681 in STMTS. When an intermediate type is used and a pattern statement is
1682 created for S2, we also put S2 here (before S3).
1684 Output:
1686 * TYPE_IN: The type of the input arguments to the pattern.
1688 * TYPE_OUT: The type of the output of this pattern.
1690 * Return value: A new stmt that will be used to replace the sequence of
1691 stmts that constitute the pattern. In this case it will be:
1692 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1694 static gimple
1695 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1696 tree *type_in, tree *type_out)
1698 gimple last_stmt = stmts->pop ();
1699 gimple def_stmt0;
1700 tree oprnd0, oprnd1;
1701 tree type, half_type0;
1702 gimple pattern_stmt;
1703 tree vectype, vectype_out = NULL_TREE;
1704 tree var;
1705 enum tree_code dummy_code;
1706 int dummy_int;
1707 vec<tree> dummy_vec;
1708 gimple use_stmt;
1709 bool promotion;
1711 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1712 return NULL;
1714 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1715 return NULL;
1717 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1718 return NULL;
1720 oprnd0 = gimple_assign_rhs1 (last_stmt);
1721 oprnd1 = gimple_assign_rhs2 (last_stmt);
1722 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1723 return NULL;
1725 /* Check operand 0: it has to be defined by a type promotion. */
1726 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1727 &promotion)
1728 || !promotion)
1729 return NULL;
1731 /* Check operand 1: has to be positive. We check that it fits the type
1732 in vect_handle_widen_op_by_const (). */
1733 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1734 return NULL;
1736 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1737 type = gimple_expr_type (last_stmt);
1739 /* Check for subsequent conversion to another type. */
1740 use_stmt = vect_single_imm_use (last_stmt);
1741 if (use_stmt && is_gimple_assign (use_stmt)
1742 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1743 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1745 tree use_lhs = gimple_assign_lhs (use_stmt);
1746 tree use_type = TREE_TYPE (use_lhs);
1748 if (INTEGRAL_TYPE_P (use_type)
1749 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1751 last_stmt = use_stmt;
1752 type = use_type;
1756 /* Check if this a widening operation. */
1757 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1758 &oprnd0, stmts,
1759 type, &half_type0, def_stmt0))
1760 return NULL;
1762 /* Pattern detected. */
1763 if (dump_enabled_p ())
1764 dump_printf_loc (MSG_NOTE, vect_location,
1765 "vect_recog_widen_shift_pattern: detected:\n");
1767 /* Check target support. */
1768 vectype = get_vectype_for_scalar_type (half_type0);
1769 vectype_out = get_vectype_for_scalar_type (type);
1771 if (!vectype
1772 || !vectype_out
1773 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1774 vectype_out, vectype,
1775 &dummy_code, &dummy_code,
1776 &dummy_int, &dummy_vec))
1777 return NULL;
1779 *type_in = vectype;
1780 *type_out = vectype_out;
1782 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1783 var = vect_recog_temp_ssa_var (type, NULL);
1784 pattern_stmt =
1785 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1787 if (dump_enabled_p ())
1788 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1790 stmts->safe_push (last_stmt);
1791 return pattern_stmt;
1794 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1796 type a_t, b_t, c_t;
1798 S0 a_t = b_t r<< c_t;
1800 Input/Output:
1802 * STMTS: Contains a stmt from which the pattern search begins,
1803 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1804 with a sequence:
1806 S1 d_t = -c_t;
1807 S2 e_t = d_t & (B - 1);
1808 S3 f_t = b_t << c_t;
1809 S4 g_t = b_t >> e_t;
1810 S0 a_t = f_t | g_t;
1812 where B is element bitsize of type.
1814 Output:
1816 * TYPE_IN: The type of the input arguments to the pattern.
1818 * TYPE_OUT: The type of the output of this pattern.
1820 * Return value: A new stmt that will be used to replace the rotate
1821 S0 stmt. */
1823 static gimple
1824 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1826 gimple last_stmt = stmts->pop ();
1827 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1828 gimple pattern_stmt, def_stmt;
1829 enum tree_code rhs_code;
1830 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1831 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1832 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1833 enum vect_def_type dt;
1834 optab optab1, optab2;
1835 edge ext_def = NULL;
1837 if (!is_gimple_assign (last_stmt))
1838 return NULL;
1840 rhs_code = gimple_assign_rhs_code (last_stmt);
1841 switch (rhs_code)
1843 case LROTATE_EXPR:
1844 case RROTATE_EXPR:
1845 break;
1846 default:
1847 return NULL;
1850 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1851 return NULL;
1853 lhs = gimple_assign_lhs (last_stmt);
1854 oprnd0 = gimple_assign_rhs1 (last_stmt);
1855 type = TREE_TYPE (oprnd0);
1856 oprnd1 = gimple_assign_rhs2 (last_stmt);
1857 if (TREE_CODE (oprnd0) != SSA_NAME
1858 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1859 || !INTEGRAL_TYPE_P (type)
1860 || !TYPE_UNSIGNED (type))
1861 return NULL;
1863 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1864 &def, &dt))
1865 return NULL;
1867 if (dt != vect_internal_def
1868 && dt != vect_constant_def
1869 && dt != vect_external_def)
1870 return NULL;
1872 vectype = get_vectype_for_scalar_type (type);
1873 if (vectype == NULL_TREE)
1874 return NULL;
1876 /* If vector/vector or vector/scalar rotate is supported by the target,
1877 don't do anything here. */
1878 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1879 if (optab1
1880 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1881 return NULL;
1883 if (bb_vinfo != NULL || dt != vect_internal_def)
1885 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1886 if (optab2
1887 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1888 return NULL;
1891 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1892 don't do anything here either. */
1893 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1894 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1895 if (!optab1
1896 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1897 || !optab2
1898 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1900 if (bb_vinfo == NULL && dt == vect_internal_def)
1901 return NULL;
1902 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1903 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1904 if (!optab1
1905 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1906 || !optab2
1907 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1908 return NULL;
1911 *type_in = vectype;
1912 *type_out = vectype;
1913 if (*type_in == NULL_TREE)
1914 return NULL;
1916 if (dt == vect_external_def
1917 && TREE_CODE (oprnd1) == SSA_NAME
1918 && loop_vinfo)
1920 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1921 ext_def = loop_preheader_edge (loop);
1922 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1924 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1925 if (bb == NULL
1926 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1927 ext_def = NULL;
1931 def = NULL_TREE;
1932 if (TREE_CODE (oprnd1) == INTEGER_CST
1933 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1934 def = oprnd1;
1935 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1937 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1938 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1939 && TYPE_PRECISION (TREE_TYPE (rhs1))
1940 == TYPE_PRECISION (type))
1941 def = rhs1;
1944 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1945 if (def == NULL_TREE)
1947 def = vect_recog_temp_ssa_var (type, NULL);
1948 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1949 NULL_TREE);
1950 if (ext_def)
1952 basic_block new_bb
1953 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1954 gcc_assert (!new_bb);
1956 else
1957 append_pattern_def_seq (stmt_vinfo, def_stmt);
1959 stype = TREE_TYPE (def);
1961 if (TREE_CODE (def) == INTEGER_CST)
1963 if (!tree_fits_uhwi_p (def)
1964 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1965 || integer_zerop (def))
1966 return NULL;
1967 def2 = build_int_cst (stype,
1968 GET_MODE_PRECISION (TYPE_MODE (type))
1969 - tree_to_uhwi (def));
1971 else
1973 tree vecstype = get_vectype_for_scalar_type (stype);
1974 stmt_vec_info def_stmt_vinfo;
1976 if (vecstype == NULL_TREE)
1977 return NULL;
1978 def2 = vect_recog_temp_ssa_var (stype, NULL);
1979 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1980 NULL_TREE);
1981 if (ext_def)
1983 basic_block new_bb
1984 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1985 gcc_assert (!new_bb);
1987 else
1989 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1990 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1991 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1992 append_pattern_def_seq (stmt_vinfo, def_stmt);
1995 def2 = vect_recog_temp_ssa_var (stype, NULL);
1996 tree mask
1997 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1998 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1999 gimple_assign_lhs (def_stmt),
2000 mask);
2001 if (ext_def)
2003 basic_block new_bb
2004 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2005 gcc_assert (!new_bb);
2007 else
2009 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2010 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2011 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2012 append_pattern_def_seq (stmt_vinfo, def_stmt);
2016 var1 = vect_recog_temp_ssa_var (type, NULL);
2017 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
2018 ? LSHIFT_EXPR : RSHIFT_EXPR,
2019 var1, oprnd0, def);
2020 append_pattern_def_seq (stmt_vinfo, def_stmt);
2022 var2 = vect_recog_temp_ssa_var (type, NULL);
2023 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
2024 ? RSHIFT_EXPR : LSHIFT_EXPR,
2025 var2, oprnd0, def2);
2026 append_pattern_def_seq (stmt_vinfo, def_stmt);
2028 /* Pattern detected. */
2029 if (dump_enabled_p ())
2030 dump_printf_loc (MSG_NOTE, vect_location,
2031 "vect_recog_rotate_pattern: detected:\n");
2033 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2034 var = vect_recog_temp_ssa_var (type, NULL);
2035 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
2037 if (dump_enabled_p ())
2038 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2040 stmts->safe_push (last_stmt);
2041 return pattern_stmt;
2044 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2045 vectorized:
2047 type a_t;
2048 TYPE b_T, res_T;
2050 S1 a_t = ;
2051 S2 b_T = ;
2052 S3 res_T = b_T op a_t;
2054 where type 'TYPE' is a type with different size than 'type',
2055 and op is <<, >> or rotate.
2057 Also detect cases:
2059 type a_t;
2060 TYPE b_T, c_T, res_T;
2062 S0 c_T = ;
2063 S1 a_t = (type) c_T;
2064 S2 b_T = ;
2065 S3 res_T = b_T op a_t;
2067 Input/Output:
2069 * STMTS: Contains a stmt from which the pattern search begins,
2070 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2071 with a shift/rotate which has same type on both operands, in the
2072 second case just b_T op c_T, in the first case with added cast
2073 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2075 Output:
2077 * TYPE_IN: The type of the input arguments to the pattern.
2079 * TYPE_OUT: The type of the output of this pattern.
2081 * Return value: A new stmt that will be used to replace the shift/rotate
2082 S3 stmt. */
2084 static gimple
2085 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
2086 tree *type_in, tree *type_out)
2088 gimple last_stmt = stmts->pop ();
2089 tree oprnd0, oprnd1, lhs, var;
2090 gimple pattern_stmt, def_stmt;
2091 enum tree_code rhs_code;
2092 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2093 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2094 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2095 enum vect_def_type dt;
2096 tree def;
2098 if (!is_gimple_assign (last_stmt))
2099 return NULL;
2101 rhs_code = gimple_assign_rhs_code (last_stmt);
2102 switch (rhs_code)
2104 case LSHIFT_EXPR:
2105 case RSHIFT_EXPR:
2106 case LROTATE_EXPR:
2107 case RROTATE_EXPR:
2108 break;
2109 default:
2110 return NULL;
2113 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2114 return NULL;
2116 lhs = gimple_assign_lhs (last_stmt);
2117 oprnd0 = gimple_assign_rhs1 (last_stmt);
2118 oprnd1 = gimple_assign_rhs2 (last_stmt);
2119 if (TREE_CODE (oprnd0) != SSA_NAME
2120 || TREE_CODE (oprnd1) != SSA_NAME
2121 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2122 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2123 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2124 || TYPE_PRECISION (TREE_TYPE (lhs))
2125 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2126 return NULL;
2128 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
2129 &def, &dt))
2130 return NULL;
2132 if (dt != vect_internal_def)
2133 return NULL;
2135 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2136 *type_out = *type_in;
2137 if (*type_in == NULL_TREE)
2138 return NULL;
2140 def = NULL_TREE;
2141 if (gimple_assign_cast_p (def_stmt))
2143 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2144 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2145 && TYPE_PRECISION (TREE_TYPE (rhs1))
2146 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2147 def = rhs1;
2150 if (def == NULL_TREE)
2152 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2153 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
2154 NULL_TREE);
2155 new_pattern_def_seq (stmt_vinfo, def_stmt);
2158 /* Pattern detected. */
2159 if (dump_enabled_p ())
2160 dump_printf_loc (MSG_NOTE, vect_location,
2161 "vect_recog_vector_vector_shift_pattern: detected:\n");
2163 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2164 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2165 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
2167 if (dump_enabled_p ())
2168 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2170 stmts->safe_push (last_stmt);
2171 return pattern_stmt;
2174 /* Detect a signed division by a constant that wouldn't be
2175 otherwise vectorized:
2177 type a_t, b_t;
2179 S1 a_t = b_t / N;
2181 where type 'type' is an integral type and N is a constant.
2183 Similarly handle modulo by a constant:
2185 S4 a_t = b_t % N;
2187 Input/Output:
2189 * STMTS: Contains a stmt from which the pattern search begins,
2190 i.e. the division stmt. S1 is replaced by if N is a power
2191 of two constant and type is signed:
2192 S3 y_t = b_t < 0 ? N - 1 : 0;
2193 S2 x_t = b_t + y_t;
2194 S1' a_t = x_t >> log2 (N);
2196 S4 is replaced if N is a power of two constant and
2197 type is signed by (where *_T temporaries have unsigned type):
2198 S9 y_T = b_t < 0 ? -1U : 0U;
2199 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2200 S7 z_t = (type) z_T;
2201 S6 w_t = b_t + z_t;
2202 S5 x_t = w_t & (N - 1);
2203 S4' a_t = x_t - z_t;
2205 Output:
2207 * TYPE_IN: The type of the input arguments to the pattern.
2209 * TYPE_OUT: The type of the output of this pattern.
2211 * Return value: A new stmt that will be used to replace the division
2212 S1 or modulo S4 stmt. */
2214 static gimple
2215 vect_recog_divmod_pattern (vec<gimple> *stmts,
2216 tree *type_in, tree *type_out)
2218 gimple last_stmt = stmts->pop ();
2219 tree oprnd0, oprnd1, vectype, itype, cond;
2220 gimple pattern_stmt, def_stmt;
2221 enum tree_code rhs_code;
2222 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2223 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2225 optab optab;
2226 tree q;
2227 int dummy_int, prec;
2228 stmt_vec_info def_stmt_vinfo;
2230 if (!is_gimple_assign (last_stmt))
2231 return NULL;
2233 rhs_code = gimple_assign_rhs_code (last_stmt);
2234 switch (rhs_code)
2236 case TRUNC_DIV_EXPR:
2237 case TRUNC_MOD_EXPR:
2238 break;
2239 default:
2240 return NULL;
2243 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2244 return NULL;
2246 oprnd0 = gimple_assign_rhs1 (last_stmt);
2247 oprnd1 = gimple_assign_rhs2 (last_stmt);
2248 itype = TREE_TYPE (oprnd0);
2249 if (TREE_CODE (oprnd0) != SSA_NAME
2250 || TREE_CODE (oprnd1) != INTEGER_CST
2251 || TREE_CODE (itype) != INTEGER_TYPE
2252 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2253 return NULL;
2255 vectype = get_vectype_for_scalar_type (itype);
2256 if (vectype == NULL_TREE)
2257 return NULL;
2259 /* If the target can handle vectorized division or modulo natively,
2260 don't attempt to optimize this. */
2261 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2262 if (optab != unknown_optab)
2264 machine_mode vec_mode = TYPE_MODE (vectype);
2265 int icode = (int) optab_handler (optab, vec_mode);
2266 if (icode != CODE_FOR_nothing)
2267 return NULL;
2270 prec = TYPE_PRECISION (itype);
2271 if (integer_pow2p (oprnd1))
2273 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2274 return NULL;
2276 /* Pattern detected. */
2277 if (dump_enabled_p ())
2278 dump_printf_loc (MSG_NOTE, vect_location,
2279 "vect_recog_divmod_pattern: detected:\n");
2281 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2282 build_int_cst (itype, 0));
2283 if (rhs_code == TRUNC_DIV_EXPR)
2285 tree var = vect_recog_temp_ssa_var (itype, NULL);
2286 tree shift;
2287 def_stmt
2288 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2289 fold_build2 (MINUS_EXPR, itype,
2290 oprnd1,
2291 build_int_cst (itype,
2292 1)),
2293 build_int_cst (itype, 0));
2294 new_pattern_def_seq (stmt_vinfo, def_stmt);
2295 var = vect_recog_temp_ssa_var (itype, NULL);
2296 def_stmt
2297 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
2298 gimple_assign_lhs (def_stmt));
2299 append_pattern_def_seq (stmt_vinfo, def_stmt);
2301 shift = build_int_cst (itype, tree_log2 (oprnd1));
2302 pattern_stmt
2303 = gimple_build_assign_with_ops (RSHIFT_EXPR,
2304 vect_recog_temp_ssa_var (itype,
2305 NULL),
2306 var, shift);
2308 else
2310 tree signmask;
2311 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2312 if (compare_tree_int (oprnd1, 2) == 0)
2314 signmask = vect_recog_temp_ssa_var (itype, NULL);
2315 def_stmt
2316 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
2317 build_int_cst (itype, 1),
2318 build_int_cst (itype, 0));
2319 append_pattern_def_seq (stmt_vinfo, def_stmt);
2321 else
2323 tree utype
2324 = build_nonstandard_integer_type (prec, 1);
2325 tree vecutype = get_vectype_for_scalar_type (utype);
2326 tree shift
2327 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2328 - tree_log2 (oprnd1));
2329 tree var = vect_recog_temp_ssa_var (utype, NULL);
2331 def_stmt
2332 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2333 build_int_cst (utype, -1),
2334 build_int_cst (utype, 0));
2335 def_stmt_vinfo
2336 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2337 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2338 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2339 append_pattern_def_seq (stmt_vinfo, def_stmt);
2340 var = vect_recog_temp_ssa_var (utype, NULL);
2341 def_stmt
2342 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2343 gimple_assign_lhs (def_stmt),
2344 shift);
2345 def_stmt_vinfo
2346 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2347 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2348 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2349 append_pattern_def_seq (stmt_vinfo, def_stmt);
2350 signmask = vect_recog_temp_ssa_var (itype, NULL);
2351 def_stmt
2352 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2353 NULL_TREE);
2354 append_pattern_def_seq (stmt_vinfo, def_stmt);
2356 def_stmt
2357 = gimple_build_assign_with_ops (PLUS_EXPR,
2358 vect_recog_temp_ssa_var (itype,
2359 NULL),
2360 oprnd0, signmask);
2361 append_pattern_def_seq (stmt_vinfo, def_stmt);
2362 def_stmt
2363 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2364 vect_recog_temp_ssa_var (itype,
2365 NULL),
2366 gimple_assign_lhs (def_stmt),
2367 fold_build2 (MINUS_EXPR, itype,
2368 oprnd1,
2369 build_int_cst (itype,
2370 1)));
2371 append_pattern_def_seq (stmt_vinfo, def_stmt);
2373 pattern_stmt
2374 = gimple_build_assign_with_ops (MINUS_EXPR,
2375 vect_recog_temp_ssa_var (itype,
2376 NULL),
2377 gimple_assign_lhs (def_stmt),
2378 signmask);
2381 if (dump_enabled_p ())
2382 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2385 stmts->safe_push (last_stmt);
2387 *type_in = vectype;
2388 *type_out = vectype;
2389 return pattern_stmt;
2392 if (prec > HOST_BITS_PER_WIDE_INT
2393 || integer_zerop (oprnd1))
2394 return NULL;
2396 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2397 return NULL;
2399 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2401 if (TYPE_UNSIGNED (itype))
2403 unsigned HOST_WIDE_INT mh, ml;
2404 int pre_shift, post_shift;
2405 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2406 & GET_MODE_MASK (TYPE_MODE (itype)));
2407 tree t1, t2, t3, t4;
2409 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2410 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2411 return NULL;
2413 /* Find a suitable multiplier and right shift count
2414 instead of multiplying with D. */
2415 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2417 /* If the suggested multiplier is more than SIZE bits, we can do better
2418 for even divisors, using an initial right shift. */
2419 if (mh != 0 && (d & 1) == 0)
2421 pre_shift = floor_log2 (d & -d);
2422 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2423 &ml, &post_shift, &dummy_int);
2424 gcc_assert (!mh);
2426 else
2427 pre_shift = 0;
2429 if (mh != 0)
2431 if (post_shift - 1 >= prec)
2432 return NULL;
2434 /* t1 = oprnd0 h* ml;
2435 t2 = oprnd0 - t1;
2436 t3 = t2 >> 1;
2437 t4 = t1 + t3;
2438 q = t4 >> (post_shift - 1); */
2439 t1 = vect_recog_temp_ssa_var (itype, NULL);
2440 def_stmt
2441 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2442 build_int_cst (itype, ml));
2443 append_pattern_def_seq (stmt_vinfo, def_stmt);
2445 t2 = vect_recog_temp_ssa_var (itype, NULL);
2446 def_stmt
2447 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2448 append_pattern_def_seq (stmt_vinfo, def_stmt);
2450 t3 = vect_recog_temp_ssa_var (itype, NULL);
2451 def_stmt
2452 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2453 integer_one_node);
2454 append_pattern_def_seq (stmt_vinfo, def_stmt);
2456 t4 = vect_recog_temp_ssa_var (itype, NULL);
2457 def_stmt
2458 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2460 if (post_shift != 1)
2462 append_pattern_def_seq (stmt_vinfo, def_stmt);
2464 q = vect_recog_temp_ssa_var (itype, NULL);
2465 pattern_stmt
2466 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2467 build_int_cst (itype,
2468 post_shift
2469 - 1));
2471 else
2473 q = t4;
2474 pattern_stmt = def_stmt;
2477 else
2479 if (pre_shift >= prec || post_shift >= prec)
2480 return NULL;
2482 /* t1 = oprnd0 >> pre_shift;
2483 t2 = t1 h* ml;
2484 q = t2 >> post_shift; */
2485 if (pre_shift)
2487 t1 = vect_recog_temp_ssa_var (itype, NULL);
2488 def_stmt
2489 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2490 build_int_cst (NULL,
2491 pre_shift));
2492 append_pattern_def_seq (stmt_vinfo, def_stmt);
2494 else
2495 t1 = oprnd0;
2497 t2 = vect_recog_temp_ssa_var (itype, NULL);
2498 def_stmt
2499 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2500 build_int_cst (itype, ml));
2502 if (post_shift)
2504 append_pattern_def_seq (stmt_vinfo, def_stmt);
2506 q = vect_recog_temp_ssa_var (itype, NULL);
2507 def_stmt
2508 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2509 build_int_cst (itype,
2510 post_shift));
2512 else
2513 q = t2;
2515 pattern_stmt = def_stmt;
2518 else
2520 unsigned HOST_WIDE_INT ml;
2521 int post_shift;
2522 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2523 unsigned HOST_WIDE_INT abs_d;
2524 bool add = false;
2525 tree t1, t2, t3, t4;
2527 /* Give up for -1. */
2528 if (d == -1)
2529 return NULL;
2531 /* Since d might be INT_MIN, we have to cast to
2532 unsigned HOST_WIDE_INT before negating to avoid
2533 undefined signed overflow. */
2534 abs_d = (d >= 0
2535 ? (unsigned HOST_WIDE_INT) d
2536 : - (unsigned HOST_WIDE_INT) d);
2538 /* n rem d = n rem -d */
2539 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2541 d = abs_d;
2542 oprnd1 = build_int_cst (itype, abs_d);
2544 else if (HOST_BITS_PER_WIDE_INT >= prec
2545 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2546 /* This case is not handled correctly below. */
2547 return NULL;
2549 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2550 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2552 add = true;
2553 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2555 if (post_shift >= prec)
2556 return NULL;
2558 /* t1 = oprnd0 h* ml; */
2559 t1 = vect_recog_temp_ssa_var (itype, NULL);
2560 def_stmt
2561 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2562 build_int_cst (itype, ml));
2564 if (add)
2566 /* t2 = t1 + oprnd0; */
2567 append_pattern_def_seq (stmt_vinfo, def_stmt);
2568 t2 = vect_recog_temp_ssa_var (itype, NULL);
2569 def_stmt
2570 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2572 else
2573 t2 = t1;
2575 if (post_shift)
2577 /* t3 = t2 >> post_shift; */
2578 append_pattern_def_seq (stmt_vinfo, def_stmt);
2579 t3 = vect_recog_temp_ssa_var (itype, NULL);
2580 def_stmt
2581 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2582 build_int_cst (itype, post_shift));
2584 else
2585 t3 = t2;
2587 wide_int oprnd0_min, oprnd0_max;
2588 int msb = 1;
2589 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2591 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2592 msb = 0;
2593 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2594 msb = -1;
2597 if (msb == 0 && d >= 0)
2599 /* q = t3; */
2600 q = t3;
2601 pattern_stmt = def_stmt;
2603 else
2605 /* t4 = oprnd0 >> (prec - 1);
2606 or if we know from VRP that oprnd0 >= 0
2607 t4 = 0;
2608 or if we know from VRP that oprnd0 < 0
2609 t4 = -1; */
2610 append_pattern_def_seq (stmt_vinfo, def_stmt);
2611 t4 = vect_recog_temp_ssa_var (itype, NULL);
2612 if (msb != 1)
2613 def_stmt
2614 = gimple_build_assign_with_ops (INTEGER_CST,
2615 t4, build_int_cst (itype, msb),
2616 NULL_TREE);
2617 else
2618 def_stmt
2619 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2620 build_int_cst (itype, prec - 1));
2621 append_pattern_def_seq (stmt_vinfo, def_stmt);
2623 /* q = t3 - t4; or q = t4 - t3; */
2624 q = vect_recog_temp_ssa_var (itype, NULL);
2625 pattern_stmt
2626 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2627 d < 0 ? t3 : t4);
2631 if (rhs_code == TRUNC_MOD_EXPR)
2633 tree r, t1;
2635 /* We divided. Now finish by:
2636 t1 = q * oprnd1;
2637 r = oprnd0 - t1; */
2638 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2640 t1 = vect_recog_temp_ssa_var (itype, NULL);
2641 def_stmt
2642 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2643 append_pattern_def_seq (stmt_vinfo, def_stmt);
2645 r = vect_recog_temp_ssa_var (itype, NULL);
2646 pattern_stmt
2647 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2650 /* Pattern detected. */
2651 if (dump_enabled_p ())
2653 dump_printf_loc (MSG_NOTE, vect_location,
2654 "vect_recog_divmod_pattern: detected: ");
2655 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2656 dump_printf (MSG_NOTE, "\n");
2659 stmts->safe_push (last_stmt);
2661 *type_in = vectype;
2662 *type_out = vectype;
2663 return pattern_stmt;
2666 /* Function vect_recog_mixed_size_cond_pattern
2668 Try to find the following pattern:
2670 type x_t, y_t;
2671 TYPE a_T, b_T, c_T;
2672 loop:
2673 S1 a_T = x_t CMP y_t ? b_T : c_T;
2675 where type 'TYPE' is an integral type which has different size
2676 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2677 than 'type', the constants need to fit into an integer type
2678 with the same width as 'type') or results of conversion from 'type'.
2680 Input:
2682 * LAST_STMT: A stmt from which the pattern search begins.
2684 Output:
2686 * TYPE_IN: The type of the input arguments to the pattern.
2688 * TYPE_OUT: The type of the output of this pattern.
2690 * Return value: A new stmt that will be used to replace the pattern.
2691 Additionally a def_stmt is added.
2693 a_it = x_t CMP y_t ? b_it : c_it;
2694 a_T = (TYPE) a_it; */
2696 static gimple
2697 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2698 tree *type_out)
2700 gimple last_stmt = (*stmts)[0];
2701 tree cond_expr, then_clause, else_clause;
2702 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2703 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2704 machine_mode cmpmode;
2705 gimple pattern_stmt, def_stmt;
2706 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2707 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2708 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2709 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2710 bool promotion;
2711 tree comp_scalar_type;
2713 if (!is_gimple_assign (last_stmt)
2714 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2715 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2716 return NULL;
2718 cond_expr = gimple_assign_rhs1 (last_stmt);
2719 then_clause = gimple_assign_rhs2 (last_stmt);
2720 else_clause = gimple_assign_rhs3 (last_stmt);
2722 if (!COMPARISON_CLASS_P (cond_expr))
2723 return NULL;
2725 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2726 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2727 if (comp_vectype == NULL_TREE)
2728 return NULL;
2730 type = gimple_expr_type (last_stmt);
2731 if (types_compatible_p (type, comp_scalar_type)
2732 || ((TREE_CODE (then_clause) != INTEGER_CST
2733 || TREE_CODE (else_clause) != INTEGER_CST)
2734 && !INTEGRAL_TYPE_P (comp_scalar_type))
2735 || !INTEGRAL_TYPE_P (type))
2736 return NULL;
2738 if ((TREE_CODE (then_clause) != INTEGER_CST
2739 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2740 &def_stmt0, &promotion))
2741 || (TREE_CODE (else_clause) != INTEGER_CST
2742 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2743 &def_stmt1, &promotion)))
2744 return NULL;
2746 if (orig_type0 && orig_type1
2747 && !types_compatible_p (orig_type0, orig_type1))
2748 return NULL;
2750 if (orig_type0)
2752 if (!types_compatible_p (orig_type0, comp_scalar_type))
2753 return NULL;
2754 then_clause = gimple_assign_rhs1 (def_stmt0);
2755 itype = orig_type0;
2758 if (orig_type1)
2760 if (!types_compatible_p (orig_type1, comp_scalar_type))
2761 return NULL;
2762 else_clause = gimple_assign_rhs1 (def_stmt1);
2763 itype = orig_type1;
2766 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2768 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2769 return NULL;
2771 vectype = get_vectype_for_scalar_type (type);
2772 if (vectype == NULL_TREE)
2773 return NULL;
2775 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2776 return NULL;
2778 if (itype == NULL_TREE)
2779 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2780 TYPE_UNSIGNED (type));
2782 if (itype == NULL_TREE
2783 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2784 return NULL;
2786 vecitype = get_vectype_for_scalar_type (itype);
2787 if (vecitype == NULL_TREE)
2788 return NULL;
2790 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2791 return NULL;
2793 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2795 if ((TREE_CODE (then_clause) == INTEGER_CST
2796 && !int_fits_type_p (then_clause, itype))
2797 || (TREE_CODE (else_clause) == INTEGER_CST
2798 && !int_fits_type_p (else_clause, itype)))
2799 return NULL;
2802 def_stmt
2803 = gimple_build_assign_with_ops (COND_EXPR,
2804 vect_recog_temp_ssa_var (itype, NULL),
2805 unshare_expr (cond_expr),
2806 fold_convert (itype, then_clause),
2807 fold_convert (itype, else_clause));
2808 pattern_stmt
2809 = gimple_build_assign_with_ops (NOP_EXPR,
2810 vect_recog_temp_ssa_var (type, NULL),
2811 gimple_assign_lhs (def_stmt), NULL_TREE);
2813 new_pattern_def_seq (stmt_vinfo, def_stmt);
2814 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2815 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2816 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2817 *type_in = vecitype;
2818 *type_out = vectype;
2820 if (dump_enabled_p ())
2821 dump_printf_loc (MSG_NOTE, vect_location,
2822 "vect_recog_mixed_size_cond_pattern: detected:\n");
2824 return pattern_stmt;
2828 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2829 true if bool VAR can be optimized that way. */
2831 static bool
2832 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2834 gimple def_stmt;
2835 enum vect_def_type dt;
2836 tree def, rhs1;
2837 enum tree_code rhs_code;
2839 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2840 &dt))
2841 return false;
2843 if (dt != vect_internal_def)
2844 return false;
2846 if (!is_gimple_assign (def_stmt))
2847 return false;
2849 if (!has_single_use (def))
2850 return false;
2852 rhs1 = gimple_assign_rhs1 (def_stmt);
2853 rhs_code = gimple_assign_rhs_code (def_stmt);
2854 switch (rhs_code)
2856 case SSA_NAME:
2857 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2859 CASE_CONVERT:
2860 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2861 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2862 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2863 return false;
2864 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2866 case BIT_NOT_EXPR:
2867 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2869 case BIT_AND_EXPR:
2870 case BIT_IOR_EXPR:
2871 case BIT_XOR_EXPR:
2872 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2873 return false;
2874 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2875 bb_vinfo);
2877 default:
2878 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2880 tree vecitype, comp_vectype;
2882 /* If the comparison can throw, then is_gimple_condexpr will be
2883 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2884 if (stmt_could_throw_p (def_stmt))
2885 return false;
2887 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2888 if (comp_vectype == NULL_TREE)
2889 return false;
2891 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2893 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2894 tree itype
2895 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2896 vecitype = get_vectype_for_scalar_type (itype);
2897 if (vecitype == NULL_TREE)
2898 return false;
2900 else
2901 vecitype = comp_vectype;
2902 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2904 return false;
2909 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2910 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2911 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2913 static tree
2914 adjust_bool_pattern_cast (tree type, tree var)
2916 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2917 gimple cast_stmt, pattern_stmt;
2919 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2920 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2921 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2922 cast_stmt
2923 = gimple_build_assign_with_ops (NOP_EXPR,
2924 vect_recog_temp_ssa_var (type, NULL),
2925 gimple_assign_lhs (pattern_stmt),
2926 NULL_TREE);
2927 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2928 return gimple_assign_lhs (cast_stmt);
2932 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2933 recursively. VAR is an SSA_NAME that should be transformed from bool
2934 to a wider integer type, OUT_TYPE is the desired final integer type of
2935 the whole pattern, TRUEVAL should be NULL unless optimizing
2936 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2937 in the then_clause, STMTS is where statements with added pattern stmts
2938 should be pushed to. */
2940 static tree
2941 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2942 vec<gimple> *stmts)
2944 gimple stmt = SSA_NAME_DEF_STMT (var);
2945 enum tree_code rhs_code, def_rhs_code;
2946 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2947 location_t loc;
2948 gimple pattern_stmt, def_stmt;
2950 rhs1 = gimple_assign_rhs1 (stmt);
2951 rhs2 = gimple_assign_rhs2 (stmt);
2952 rhs_code = gimple_assign_rhs_code (stmt);
2953 loc = gimple_location (stmt);
2954 switch (rhs_code)
2956 case SSA_NAME:
2957 CASE_CONVERT:
2958 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2959 itype = TREE_TYPE (irhs1);
2960 pattern_stmt
2961 = gimple_build_assign_with_ops (SSA_NAME,
2962 vect_recog_temp_ssa_var (itype, NULL),
2963 irhs1, NULL_TREE);
2964 break;
2966 case BIT_NOT_EXPR:
2967 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2968 itype = TREE_TYPE (irhs1);
2969 pattern_stmt
2970 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2971 vect_recog_temp_ssa_var (itype, NULL),
2972 irhs1, build_int_cst (itype, 1));
2973 break;
2975 case BIT_AND_EXPR:
2976 /* Try to optimize x = y & (a < b ? 1 : 0); into
2977 x = (a < b ? y : 0);
2979 E.g. for:
2980 bool a_b, b_b, c_b;
2981 TYPE d_T;
2983 S1 a_b = x1 CMP1 y1;
2984 S2 b_b = x2 CMP2 y2;
2985 S3 c_b = a_b & b_b;
2986 S4 d_T = (TYPE) c_b;
2988 we would normally emit:
2990 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2991 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2992 S3' c_T = a_T & b_T;
2993 S4' d_T = c_T;
2995 but we can save one stmt by using the
2996 result of one of the COND_EXPRs in the other COND_EXPR and leave
2997 BIT_AND_EXPR stmt out:
2999 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3000 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3001 S4' f_T = c_T;
3003 At least when VEC_COND_EXPR is implemented using masks
3004 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3005 computes the comparison masks and ands it, in one case with
3006 all ones vector, in the other case with a vector register.
3007 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3008 often more expensive. */
3009 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3010 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3011 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3013 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3014 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3015 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3016 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3018 gimple tstmt;
3019 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3020 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3021 tstmt = stmts->pop ();
3022 gcc_assert (tstmt == def_stmt);
3023 stmts->quick_push (stmt);
3024 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3025 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3026 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3027 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3028 return irhs2;
3030 else
3031 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3032 goto and_ior_xor;
3034 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3035 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3036 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3038 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3039 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3040 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3041 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3043 gimple tstmt;
3044 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3045 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3046 tstmt = stmts->pop ();
3047 gcc_assert (tstmt == def_stmt);
3048 stmts->quick_push (stmt);
3049 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3050 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3051 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3052 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3053 return irhs1;
3055 else
3056 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3057 goto and_ior_xor;
3059 /* FALLTHRU */
3060 case BIT_IOR_EXPR:
3061 case BIT_XOR_EXPR:
3062 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3063 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3064 and_ior_xor:
3065 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3066 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3068 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3069 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3070 int out_prec = TYPE_PRECISION (out_type);
3071 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3072 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3073 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3074 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3075 else
3077 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3078 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3081 itype = TREE_TYPE (irhs1);
3082 pattern_stmt
3083 = gimple_build_assign_with_ops (rhs_code,
3084 vect_recog_temp_ssa_var (itype, NULL),
3085 irhs1, irhs2);
3086 break;
3088 default:
3089 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3090 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3091 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3092 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3093 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3095 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3096 itype
3097 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3099 else
3100 itype = TREE_TYPE (rhs1);
3101 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3102 if (trueval == NULL_TREE)
3103 trueval = build_int_cst (itype, 1);
3104 else
3105 gcc_checking_assert (useless_type_conversion_p (itype,
3106 TREE_TYPE (trueval)));
3107 pattern_stmt
3108 = gimple_build_assign_with_ops (COND_EXPR,
3109 vect_recog_temp_ssa_var (itype, NULL),
3110 cond_expr, trueval,
3111 build_int_cst (itype, 0));
3112 break;
3115 stmts->safe_push (stmt);
3116 gimple_set_location (pattern_stmt, loc);
3117 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3118 return gimple_assign_lhs (pattern_stmt);
3122 /* Function vect_recog_bool_pattern
3124 Try to find pattern like following:
3126 bool a_b, b_b, c_b, d_b, e_b;
3127 TYPE f_T;
3128 loop:
3129 S1 a_b = x1 CMP1 y1;
3130 S2 b_b = x2 CMP2 y2;
3131 S3 c_b = a_b & b_b;
3132 S4 d_b = x3 CMP3 y3;
3133 S5 e_b = c_b | d_b;
3134 S6 f_T = (TYPE) e_b;
3136 where type 'TYPE' is an integral type. Or a similar pattern
3137 ending in
3139 S6 f_Y = e_b ? r_Y : s_Y;
3141 as results from if-conversion of a complex condition.
3143 Input:
3145 * LAST_STMT: A stmt at the end from which the pattern
3146 search begins, i.e. cast of a bool to
3147 an integer type.
3149 Output:
3151 * TYPE_IN: The type of the input arguments to the pattern.
3153 * TYPE_OUT: The type of the output of this pattern.
3155 * Return value: A new stmt that will be used to replace the pattern.
3157 Assuming size of TYPE is the same as size of all comparisons
3158 (otherwise some casts would be added where needed), the above
3159 sequence we create related pattern stmts:
3160 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3161 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3162 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3163 S5' e_T = c_T | d_T;
3164 S6' f_T = e_T;
3166 Instead of the above S3' we could emit:
3167 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3168 S3' c_T = a_T | b_T;
3169 but the above is more efficient. */
3171 static gimple
3172 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
3173 tree *type_out)
3175 gimple last_stmt = stmts->pop ();
3176 enum tree_code rhs_code;
3177 tree var, lhs, rhs, vectype;
3178 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3179 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3180 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
3181 gimple pattern_stmt;
3183 if (!is_gimple_assign (last_stmt))
3184 return NULL;
3186 var = gimple_assign_rhs1 (last_stmt);
3187 lhs = gimple_assign_lhs (last_stmt);
3189 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3190 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3191 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3192 return NULL;
3194 rhs_code = gimple_assign_rhs_code (last_stmt);
3195 if (CONVERT_EXPR_CODE_P (rhs_code))
3197 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3198 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3199 return NULL;
3200 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3201 if (vectype == NULL_TREE)
3202 return NULL;
3204 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3205 return NULL;
3207 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3208 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3209 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3210 pattern_stmt
3211 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
3212 else
3213 pattern_stmt
3214 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
3215 *type_out = vectype;
3216 *type_in = vectype;
3217 stmts->safe_push (last_stmt);
3218 if (dump_enabled_p ())
3219 dump_printf_loc (MSG_NOTE, vect_location,
3220 "vect_recog_bool_pattern: detected:\n");
3222 return pattern_stmt;
3224 else if (rhs_code == COND_EXPR
3225 && TREE_CODE (var) == SSA_NAME)
3227 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3228 if (vectype == NULL_TREE)
3229 return NULL;
3231 /* Build a scalar type for the boolean result that when
3232 vectorized matches the vector type of the result in
3233 size and number of elements. */
3234 unsigned prec
3235 = wi::udiv_trunc (TYPE_SIZE (vectype),
3236 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3237 tree type
3238 = build_nonstandard_integer_type (prec,
3239 TYPE_UNSIGNED (TREE_TYPE (var)));
3240 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3241 return NULL;
3243 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3244 return NULL;
3246 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3247 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3248 pattern_stmt
3249 = gimple_build_assign_with_ops (COND_EXPR, lhs,
3250 build2 (NE_EXPR, boolean_type_node,
3251 rhs, build_int_cst (type, 0)),
3252 gimple_assign_rhs2 (last_stmt),
3253 gimple_assign_rhs3 (last_stmt));
3254 *type_out = vectype;
3255 *type_in = vectype;
3256 stmts->safe_push (last_stmt);
3257 if (dump_enabled_p ())
3258 dump_printf_loc (MSG_NOTE, vect_location,
3259 "vect_recog_bool_pattern: detected:\n");
3261 return pattern_stmt;
3263 else if (rhs_code == SSA_NAME
3264 && STMT_VINFO_DATA_REF (stmt_vinfo))
3266 stmt_vec_info pattern_stmt_info;
3267 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3268 gcc_assert (vectype != NULL_TREE);
3269 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3270 return NULL;
3271 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3272 return NULL;
3274 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3275 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3276 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3278 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3279 gimple cast_stmt
3280 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
3281 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3282 rhs = rhs2;
3284 pattern_stmt
3285 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
3286 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3287 bb_vinfo);
3288 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3289 STMT_VINFO_DATA_REF (pattern_stmt_info)
3290 = STMT_VINFO_DATA_REF (stmt_vinfo);
3291 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3292 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3293 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3294 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3295 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3296 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3297 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3298 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3299 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3300 *type_out = vectype;
3301 *type_in = vectype;
3302 stmts->safe_push (last_stmt);
3303 if (dump_enabled_p ())
3304 dump_printf_loc (MSG_NOTE, vect_location,
3305 "vect_recog_bool_pattern: detected:\n");
3306 return pattern_stmt;
3308 else
3309 return NULL;
3313 /* Mark statements that are involved in a pattern. */
3315 static inline void
3316 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3317 tree pattern_vectype)
3319 stmt_vec_info pattern_stmt_info, def_stmt_info;
3320 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3321 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3322 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3323 gimple def_stmt;
3325 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3326 if (pattern_stmt_info == NULL)
3328 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3329 bb_vinfo);
3330 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3332 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3334 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3335 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3336 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3337 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3338 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3339 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3340 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3341 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3342 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3344 gimple_stmt_iterator si;
3345 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3346 !gsi_end_p (si); gsi_next (&si))
3348 def_stmt = gsi_stmt (si);
3349 def_stmt_info = vinfo_for_stmt (def_stmt);
3350 if (def_stmt_info == NULL)
3352 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3353 bb_vinfo);
3354 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3356 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3357 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3358 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3359 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3360 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3365 /* Function vect_pattern_recog_1
3367 Input:
3368 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3369 computation pattern.
3370 STMT: A stmt from which the pattern search should start.
3372 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3373 expression that computes the same functionality and can be used to
3374 replace the sequence of stmts that are involved in the pattern.
3376 Output:
3377 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3378 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3379 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3380 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3381 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3382 to the available target pattern.
3384 This function also does some bookkeeping, as explained in the documentation
3385 for vect_recog_pattern. */
3387 static void
3388 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3389 gimple_stmt_iterator si,
3390 vec<gimple> *stmts_to_replace)
3392 gimple stmt = gsi_stmt (si), pattern_stmt;
3393 stmt_vec_info stmt_info;
3394 loop_vec_info loop_vinfo;
3395 tree pattern_vectype;
3396 tree type_in, type_out;
3397 enum tree_code code;
3398 int i;
3399 gimple next;
3401 stmts_to_replace->truncate (0);
3402 stmts_to_replace->quick_push (stmt);
3403 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3404 if (!pattern_stmt)
3405 return;
3407 stmt = stmts_to_replace->last ();
3408 stmt_info = vinfo_for_stmt (stmt);
3409 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3411 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3413 /* No need to check target support (already checked by the pattern
3414 recognition function). */
3415 pattern_vectype = type_out ? type_out : type_in;
3417 else
3419 machine_mode vec_mode;
3420 enum insn_code icode;
3421 optab optab;
3423 /* Check target support */
3424 type_in = get_vectype_for_scalar_type (type_in);
3425 if (!type_in)
3426 return;
3427 if (type_out)
3428 type_out = get_vectype_for_scalar_type (type_out);
3429 else
3430 type_out = type_in;
3431 if (!type_out)
3432 return;
3433 pattern_vectype = type_out;
3435 if (is_gimple_assign (pattern_stmt))
3436 code = gimple_assign_rhs_code (pattern_stmt);
3437 else
3439 gcc_assert (is_gimple_call (pattern_stmt));
3440 code = CALL_EXPR;
3443 optab = optab_for_tree_code (code, type_in, optab_default);
3444 vec_mode = TYPE_MODE (type_in);
3445 if (!optab
3446 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3447 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3448 return;
3451 /* Found a vectorizable pattern. */
3452 if (dump_enabled_p ())
3454 dump_printf_loc (MSG_NOTE, vect_location,
3455 "pattern recognized: ");
3456 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3457 dump_printf (MSG_NOTE, "\n");
3460 /* Mark the stmts that are involved in the pattern. */
3461 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3463 /* Patterns cannot be vectorized using SLP, because they change the order of
3464 computation. */
3465 if (loop_vinfo)
3466 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3467 if (next == stmt)
3468 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3470 /* It is possible that additional pattern stmts are created and inserted in
3471 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3472 relevant statements. */
3473 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3474 && (unsigned) i < (stmts_to_replace->length () - 1);
3475 i++)
3477 stmt_info = vinfo_for_stmt (stmt);
3478 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3479 if (dump_enabled_p ())
3481 dump_printf_loc (MSG_NOTE, vect_location,
3482 "additional pattern stmt: ");
3483 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3484 dump_printf (MSG_NOTE, "\n");
3487 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3492 /* Function vect_pattern_recog
3494 Input:
3495 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3496 computation idioms.
3498 Output - for each computation idiom that is detected we create a new stmt
3499 that provides the same functionality and that can be vectorized. We
3500 also record some information in the struct_stmt_info of the relevant
3501 stmts, as explained below:
3503 At the entry to this function we have the following stmts, with the
3504 following initial value in the STMT_VINFO fields:
3506 stmt in_pattern_p related_stmt vec_stmt
3507 S1: a_i = .... - - -
3508 S2: a_2 = ..use(a_i).. - - -
3509 S3: a_1 = ..use(a_2).. - - -
3510 S4: a_0 = ..use(a_1).. - - -
3511 S5: ... = ..use(a_0).. - - -
3513 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3514 represented by a single stmt. We then:
3515 - create a new stmt S6 equivalent to the pattern (the stmt is not
3516 inserted into the code)
3517 - fill in the STMT_VINFO fields as follows:
3519 in_pattern_p related_stmt vec_stmt
3520 S1: a_i = .... - - -
3521 S2: a_2 = ..use(a_i).. - - -
3522 S3: a_1 = ..use(a_2).. - - -
3523 S4: a_0 = ..use(a_1).. true S6 -
3524 '---> S6: a_new = .... - S4 -
3525 S5: ... = ..use(a_0).. - - -
3527 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3528 to each other through the RELATED_STMT field).
3530 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3531 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3532 remain irrelevant unless used by stmts other than S4.
3534 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3535 (because they are marked as irrelevant). It will vectorize S6, and record
3536 a pointer to the new vector stmt VS6 from S6 (as usual).
3537 S4 will be skipped, and S5 will be vectorized as usual:
3539 in_pattern_p related_stmt vec_stmt
3540 S1: a_i = .... - - -
3541 S2: a_2 = ..use(a_i).. - - -
3542 S3: a_1 = ..use(a_2).. - - -
3543 > VS6: va_new = .... - - -
3544 S4: a_0 = ..use(a_1).. true S6 VS6
3545 '---> S6: a_new = .... - S4 VS6
3546 > VS5: ... = ..vuse(va_new).. - - -
3547 S5: ... = ..use(a_0).. - - -
3549 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3550 elsewhere), and we'll end up with:
3552 VS6: va_new = ....
3553 VS5: ... = ..vuse(va_new)..
3555 In case of more than one pattern statements, e.g., widen-mult with
3556 intermediate type:
3558 S1 a_t = ;
3559 S2 a_T = (TYPE) a_t;
3560 '--> S3: a_it = (interm_type) a_t;
3561 S4 prod_T = a_T * CONST;
3562 '--> S5: prod_T' = a_it w* CONST;
3564 there may be other users of a_T outside the pattern. In that case S2 will
3565 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3566 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3567 be recorded in S3. */
3569 void
3570 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3572 struct loop *loop;
3573 basic_block *bbs;
3574 unsigned int nbbs;
3575 gimple_stmt_iterator si;
3576 unsigned int i, j;
3577 vect_recog_func_ptr vect_recog_func;
3578 auto_vec<gimple, 1> stmts_to_replace;
3579 gimple stmt;
3581 if (dump_enabled_p ())
3582 dump_printf_loc (MSG_NOTE, vect_location,
3583 "=== vect_pattern_recog ===\n");
3585 if (loop_vinfo)
3587 loop = LOOP_VINFO_LOOP (loop_vinfo);
3588 bbs = LOOP_VINFO_BBS (loop_vinfo);
3589 nbbs = loop->num_nodes;
3591 else
3593 bbs = &BB_VINFO_BB (bb_vinfo);
3594 nbbs = 1;
3597 /* Scan through the loop stmts, applying the pattern recognition
3598 functions starting at each stmt visited: */
3599 for (i = 0; i < nbbs; i++)
3601 basic_block bb = bbs[i];
3602 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3604 if (bb_vinfo && (stmt = gsi_stmt (si))
3605 && vinfo_for_stmt (stmt)
3606 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3607 continue;
3609 /* Scan over all generic vect_recog_xxx_pattern functions. */
3610 for (j = 0; j < NUM_PATTERNS; j++)
3612 vect_recog_func = vect_vect_recog_func_ptrs[j];
3613 vect_pattern_recog_1 (vect_recog_func, si,
3614 &stmts_to_replace);