* expr.h (array_at_struct_end_p): Move to...
[official-gcc.git] / gcc / tree-vect-patterns.c
blob42002c464d0b995ad259a0be3f84fe0ba3e112ea
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "target.h"
38 #include "predict.h"
39 #include "hard-reg-set.h"
40 #include "function.h"
41 #include "dominance.h"
42 #include "basic-block.h"
43 #include "gimple-pretty-print.h"
44 #include "tree-ssa-alias.h"
45 #include "internal-fn.h"
46 #include "tree-eh.h"
47 #include "gimple-expr.h"
48 #include "is-a.h"
49 #include "gimple.h"
50 #include "gimplify.h"
51 #include "gimple-iterator.h"
52 #include "gimple-ssa.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "cfgloop.h"
58 #include "hashtab.h"
59 #include "rtl.h"
60 #include "flags.h"
61 #include "statistics.h"
62 #include "real.h"
63 #include "fixed-value.h"
64 #include "insn-config.h"
65 #include "expmed.h"
66 #include "dojump.h"
67 #include "explow.h"
68 #include "calls.h"
69 #include "emit-rtl.h"
70 #include "varasm.h"
71 #include "stmt.h"
72 #include "expr.h"
73 #include "insn-codes.h"
74 #include "optabs.h"
75 #include "params.h"
76 #include "tree-data-ref.h"
77 #include "tree-vectorizer.h"
78 #include "recog.h" /* FIXME: for insn_data */
79 #include "diagnostic-core.h"
80 #include "dumpfile.h"
81 #include "builtins.h"
83 /* Pattern recognition functions */
84 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
85 tree *);
86 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
87 tree *);
88 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
89 tree *);
90 static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
91 tree *);
92 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
93 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
94 tree *);
95 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
96 tree *, tree *);
97 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
98 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
99 tree *, tree *);
100 static gimple vect_recog_divmod_pattern (vec<gimple> *,
101 tree *, tree *);
102 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
103 tree *, tree *);
104 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
105 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
106 vect_recog_widen_mult_pattern,
107 vect_recog_widen_sum_pattern,
108 vect_recog_dot_prod_pattern,
109 vect_recog_sad_pattern,
110 vect_recog_pow_pattern,
111 vect_recog_widen_shift_pattern,
112 vect_recog_over_widening_pattern,
113 vect_recog_rotate_pattern,
114 vect_recog_vector_vector_shift_pattern,
115 vect_recog_divmod_pattern,
116 vect_recog_mixed_size_cond_pattern,
117 vect_recog_bool_pattern};
119 static inline void
120 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
122 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
123 stmt);
126 static inline void
127 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
129 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
130 append_pattern_def_seq (stmt_info, stmt);
133 /* Check whether STMT2 is in the same loop or basic block as STMT1.
134 Which of the two applies depends on whether we're currently doing
135 loop-based or basic-block-based vectorization, as determined by
136 the vinfo_for_stmt for STMT1 (which must be defined).
138 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
139 to be defined as well. */
141 static bool
142 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
144 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
145 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
146 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
148 if (!gimple_bb (stmt2))
149 return false;
151 if (loop_vinfo)
153 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
154 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
155 return false;
157 else
159 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
160 || gimple_code (stmt2) == GIMPLE_PHI)
161 return false;
164 gcc_assert (vinfo_for_stmt (stmt2));
165 return true;
168 /* If the LHS of DEF_STMT has a single use, and that statement is
169 in the same loop or basic block, return it. */
171 static gimple
172 vect_single_imm_use (gimple def_stmt)
174 tree lhs = gimple_assign_lhs (def_stmt);
175 use_operand_p use_p;
176 gimple use_stmt;
178 if (!single_imm_use (lhs, &use_p, &use_stmt))
179 return NULL;
181 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
182 return NULL;
184 return use_stmt;
187 /* Check whether NAME, an ssa-name used in USE_STMT,
188 is a result of a type promotion, such that:
189 DEF_STMT: NAME = NOP (name0)
190 If CHECK_SIGN is TRUE, check that either both types are signed or both are
191 unsigned. */
193 static bool
194 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
195 tree *orig_type, gimple *def_stmt, bool *promotion)
197 tree dummy;
198 gimple dummy_gimple;
199 loop_vec_info loop_vinfo;
200 stmt_vec_info stmt_vinfo;
201 tree type = TREE_TYPE (name);
202 tree oprnd0;
203 enum vect_def_type dt;
204 tree def;
205 bb_vec_info bb_vinfo;
207 stmt_vinfo = vinfo_for_stmt (use_stmt);
208 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
209 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
210 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
211 &def, &dt))
212 return false;
214 if (dt != vect_internal_def
215 && dt != vect_external_def && dt != vect_constant_def)
216 return false;
218 if (!*def_stmt)
219 return false;
221 if (!is_gimple_assign (*def_stmt))
222 return false;
224 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
225 return false;
227 oprnd0 = gimple_assign_rhs1 (*def_stmt);
229 *orig_type = TREE_TYPE (oprnd0);
230 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
231 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
232 return false;
234 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
235 *promotion = true;
236 else
237 *promotion = false;
239 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
240 bb_vinfo, &dummy_gimple, &dummy, &dt))
241 return false;
243 return true;
246 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
247 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
249 static tree
250 vect_recog_temp_ssa_var (tree type, gimple stmt)
252 return make_temp_ssa_name (type, stmt, "patt");
255 /* Function vect_recog_dot_prod_pattern
257 Try to find the following pattern:
259 type x_t, y_t;
260 TYPE1 prod;
261 TYPE2 sum = init;
262 loop:
263 sum_0 = phi <init, sum_1>
264 S1 x_t = ...
265 S2 y_t = ...
266 S3 x_T = (TYPE1) x_t;
267 S4 y_T = (TYPE1) y_t;
268 S5 prod = x_T * y_T;
269 [S6 prod = (TYPE2) prod; #optional]
270 S7 sum_1 = prod + sum_0;
272 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
273 same size of 'TYPE1' or bigger. This is a special case of a reduction
274 computation.
276 Input:
278 * STMTS: Contains a stmt from which the pattern search begins. In the
279 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
280 will be detected.
282 Output:
284 * TYPE_IN: The type of the input arguments to the pattern.
286 * TYPE_OUT: The type of the output of this pattern.
288 * Return value: A new stmt that will be used to replace the sequence of
289 stmts that constitute the pattern. In this case it will be:
290 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
292 Note: The dot-prod idiom is a widening reduction pattern that is
293 vectorized without preserving all the intermediate results. It
294 produces only N/2 (widened) results (by summing up pairs of
295 intermediate results) rather than all N results. Therefore, we
296 cannot allow this pattern when we want to get all the results and in
297 the correct order (as is the case when this computation is in an
298 inner-loop nested in an outer-loop that us being vectorized). */
300 static gimple
301 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
302 tree *type_out)
304 gimple stmt, last_stmt = (*stmts)[0];
305 tree oprnd0, oprnd1;
306 tree oprnd00, oprnd01;
307 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
308 tree type, half_type;
309 gimple pattern_stmt;
310 tree prod_type;
311 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
312 struct loop *loop;
313 tree var;
314 bool promotion;
316 if (!loop_info)
317 return NULL;
319 loop = LOOP_VINFO_LOOP (loop_info);
321 /* We don't allow changing the order of the computation in the inner-loop
322 when doing outer-loop vectorization. */
323 if (loop && nested_in_vect_loop_p (loop, last_stmt))
324 return NULL;
326 if (!is_gimple_assign (last_stmt))
327 return NULL;
329 type = gimple_expr_type (last_stmt);
331 /* Look for the following pattern
332 DX = (TYPE1) X;
333 DY = (TYPE1) Y;
334 DPROD = DX * DY;
335 DDPROD = (TYPE2) DPROD;
336 sum_1 = DDPROD + sum_0;
337 In which
338 - DX is double the size of X
339 - DY is double the size of Y
340 - DX, DY, DPROD all have the same type
341 - sum is the same size of DPROD or bigger
342 - sum has been recognized as a reduction variable.
344 This is equivalent to:
345 DPROD = X w* Y; #widen mult
346 sum_1 = DPROD w+ sum_0; #widen summation
348 DPROD = X w* Y; #widen mult
349 sum_1 = DPROD + sum_0; #summation
352 /* Starting from LAST_STMT, follow the defs of its uses in search
353 of the above pattern. */
355 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
356 return NULL;
358 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
360 /* Has been detected as widening-summation? */
362 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
363 type = gimple_expr_type (stmt);
364 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
365 return NULL;
366 oprnd0 = gimple_assign_rhs1 (stmt);
367 oprnd1 = gimple_assign_rhs2 (stmt);
368 half_type = TREE_TYPE (oprnd0);
370 else
372 gimple def_stmt;
374 oprnd0 = gimple_assign_rhs1 (last_stmt);
375 oprnd1 = gimple_assign_rhs2 (last_stmt);
376 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
377 || !types_compatible_p (TREE_TYPE (oprnd1), type))
378 return NULL;
379 stmt = last_stmt;
381 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
382 &promotion)
383 && promotion)
385 stmt = def_stmt;
386 oprnd0 = gimple_assign_rhs1 (stmt);
388 else
389 half_type = type;
392 /* So far so good. Since last_stmt was detected as a (summation) reduction,
393 we know that oprnd1 is the reduction variable (defined by a loop-header
394 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
395 Left to check that oprnd0 is defined by a (widen_)mult_expr */
396 if (TREE_CODE (oprnd0) != SSA_NAME)
397 return NULL;
399 prod_type = half_type;
400 stmt = SSA_NAME_DEF_STMT (oprnd0);
402 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
403 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
404 return NULL;
406 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
407 inside the loop (in case we are analyzing an outer-loop). */
408 if (!is_gimple_assign (stmt))
409 return NULL;
410 stmt_vinfo = vinfo_for_stmt (stmt);
411 gcc_assert (stmt_vinfo);
412 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
413 return NULL;
414 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
415 return NULL;
416 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
418 /* Has been detected as a widening multiplication? */
420 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
421 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
422 return NULL;
423 stmt_vinfo = vinfo_for_stmt (stmt);
424 gcc_assert (stmt_vinfo);
425 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
426 oprnd00 = gimple_assign_rhs1 (stmt);
427 oprnd01 = gimple_assign_rhs2 (stmt);
428 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
429 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
431 else
433 tree half_type0, half_type1;
434 gimple def_stmt;
435 tree oprnd0, oprnd1;
437 oprnd0 = gimple_assign_rhs1 (stmt);
438 oprnd1 = gimple_assign_rhs2 (stmt);
439 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
440 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
441 return NULL;
442 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
443 &promotion)
444 || !promotion)
445 return NULL;
446 oprnd00 = gimple_assign_rhs1 (def_stmt);
447 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
448 &promotion)
449 || !promotion)
450 return NULL;
451 oprnd01 = gimple_assign_rhs1 (def_stmt);
452 if (!types_compatible_p (half_type0, half_type1))
453 return NULL;
454 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
455 return NULL;
458 half_type = TREE_TYPE (oprnd00);
459 *type_in = half_type;
460 *type_out = type;
462 /* Pattern detected. Create a stmt to be used to replace the pattern: */
463 var = vect_recog_temp_ssa_var (type, NULL);
464 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
465 oprnd00, oprnd01, oprnd1);
467 if (dump_enabled_p ())
469 dump_printf_loc (MSG_NOTE, vect_location,
470 "vect_recog_dot_prod_pattern: detected: ");
471 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
472 dump_printf (MSG_NOTE, "\n");
475 return pattern_stmt;
479 /* Function vect_recog_sad_pattern
481 Try to find the following Sum of Absolute Difference (SAD) pattern:
483 type x_t, y_t;
484 signed TYPE1 diff, abs_diff;
485 TYPE2 sum = init;
486 loop:
487 sum_0 = phi <init, sum_1>
488 S1 x_t = ...
489 S2 y_t = ...
490 S3 x_T = (TYPE1) x_t;
491 S4 y_T = (TYPE1) y_t;
492 S5 diff = x_T - y_T;
493 S6 abs_diff = ABS_EXPR <diff>;
494 [S7 abs_diff = (TYPE2) abs_diff; #optional]
495 S8 sum_1 = abs_diff + sum_0;
497 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
498 same size of 'TYPE1' or bigger. This is a special case of a reduction
499 computation.
501 Input:
503 * STMTS: Contains a stmt from which the pattern search begins. In the
504 example, when this function is called with S8, the pattern
505 {S3,S4,S5,S6,S7,S8} will be detected.
507 Output:
509 * TYPE_IN: The type of the input arguments to the pattern.
511 * TYPE_OUT: The type of the output of this pattern.
513 * Return value: A new stmt that will be used to replace the sequence of
514 stmts that constitute the pattern. In this case it will be:
515 SAD_EXPR <x_t, y_t, sum_0>
518 static gimple
519 vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
520 tree *type_out)
522 gimple last_stmt = (*stmts)[0];
523 tree sad_oprnd0, sad_oprnd1;
524 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
525 tree half_type;
526 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
527 struct loop *loop;
528 bool promotion;
530 if (!loop_info)
531 return NULL;
533 loop = LOOP_VINFO_LOOP (loop_info);
535 /* We don't allow changing the order of the computation in the inner-loop
536 when doing outer-loop vectorization. */
537 if (loop && nested_in_vect_loop_p (loop, last_stmt))
538 return NULL;
540 if (!is_gimple_assign (last_stmt))
541 return NULL;
543 tree sum_type = gimple_expr_type (last_stmt);
545 /* Look for the following pattern
546 DX = (TYPE1) X;
547 DY = (TYPE1) Y;
548 DDIFF = DX - DY;
549 DAD = ABS_EXPR <DDIFF>;
550 DDPROD = (TYPE2) DPROD;
551 sum_1 = DAD + sum_0;
552 In which
553 - DX is at least double the size of X
554 - DY is at least double the size of Y
555 - DX, DY, DDIFF, DAD all have the same type
556 - sum is the same size of DAD or bigger
557 - sum has been recognized as a reduction variable.
559 This is equivalent to:
560 DDIFF = X w- Y; #widen sub
561 DAD = ABS_EXPR <DDIFF>;
562 sum_1 = DAD w+ sum_0; #widen summation
564 DDIFF = X w- Y; #widen sub
565 DAD = ABS_EXPR <DDIFF>;
566 sum_1 = DAD + sum_0; #summation
569 /* Starting from LAST_STMT, follow the defs of its uses in search
570 of the above pattern. */
572 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
573 return NULL;
575 tree plus_oprnd0, plus_oprnd1;
577 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
579 /* Has been detected as widening-summation? */
581 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
582 sum_type = gimple_expr_type (stmt);
583 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
584 return NULL;
585 plus_oprnd0 = gimple_assign_rhs1 (stmt);
586 plus_oprnd1 = gimple_assign_rhs2 (stmt);
587 half_type = TREE_TYPE (plus_oprnd0);
589 else
591 gimple def_stmt;
593 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
594 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
595 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
596 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
597 return NULL;
599 /* The type conversion could be promotion, demotion,
600 or just signed -> unsigned. */
601 if (type_conversion_p (plus_oprnd0, last_stmt, false,
602 &half_type, &def_stmt, &promotion))
603 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
604 else
605 half_type = sum_type;
608 /* So far so good. Since last_stmt was detected as a (summation) reduction,
609 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
610 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
611 Then check that plus_oprnd0 is defined by an abs_expr. */
613 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
614 return NULL;
616 tree abs_type = half_type;
617 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
619 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
620 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
621 return NULL;
623 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
624 inside the loop (in case we are analyzing an outer-loop). */
625 if (!is_gimple_assign (abs_stmt))
626 return NULL;
628 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
629 gcc_assert (abs_stmt_vinfo);
630 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
631 return NULL;
632 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
633 return NULL;
635 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
636 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
637 return NULL;
638 if (TYPE_UNSIGNED (abs_type))
639 return NULL;
641 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
643 if (TREE_CODE (abs_oprnd) != SSA_NAME)
644 return NULL;
646 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
648 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
649 if (!gimple_bb (diff_stmt)
650 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
651 return NULL;
653 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
654 inside the loop (in case we are analyzing an outer-loop). */
655 if (!is_gimple_assign (diff_stmt))
656 return NULL;
658 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
659 gcc_assert (diff_stmt_vinfo);
660 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
661 return NULL;
662 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
663 return NULL;
665 tree half_type0, half_type1;
666 gimple def_stmt;
668 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
669 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
671 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
672 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
673 return NULL;
674 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
675 &half_type0, &def_stmt, &promotion)
676 || !promotion)
677 return NULL;
678 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
680 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
681 &half_type1, &def_stmt, &promotion)
682 || !promotion)
683 return NULL;
684 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
686 if (!types_compatible_p (half_type0, half_type1))
687 return NULL;
688 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
689 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
690 return NULL;
692 *type_in = TREE_TYPE (sad_oprnd0);
693 *type_out = sum_type;
695 /* Pattern detected. Create a stmt to be used to replace the pattern: */
696 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
697 gimple pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
698 sad_oprnd1, plus_oprnd1);
700 if (dump_enabled_p ())
702 dump_printf_loc (MSG_NOTE, vect_location,
703 "vect_recog_sad_pattern: detected: ");
704 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
705 dump_printf (MSG_NOTE, "\n");
708 return pattern_stmt;
712 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
713 and LSHIFT_EXPR.
715 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
716 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
718 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
719 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
720 that satisfies the above restrictions, we can perform a widening opeartion
721 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
722 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
724 static bool
725 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
726 tree const_oprnd, tree *oprnd,
727 gimple *wstmt, tree type,
728 tree *half_type, gimple def_stmt)
730 tree new_type, new_oprnd;
732 if (code != MULT_EXPR && code != LSHIFT_EXPR)
733 return false;
735 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
736 || (code == LSHIFT_EXPR
737 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
738 != 1))
739 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
741 /* CONST_OPRND is a constant of HALF_TYPE. */
742 *oprnd = gimple_assign_rhs1 (def_stmt);
743 return true;
746 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
747 return false;
749 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
750 return false;
752 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
753 a type 2 times bigger than HALF_TYPE. */
754 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
755 TYPE_UNSIGNED (type));
756 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
757 || (code == LSHIFT_EXPR
758 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
759 return false;
761 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
762 *oprnd = gimple_assign_rhs1 (def_stmt);
763 new_oprnd = make_ssa_name (new_type);
764 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
765 *oprnd = new_oprnd;
767 *half_type = new_type;
768 return true;
772 /* Function vect_recog_widen_mult_pattern
774 Try to find the following pattern:
776 type1 a_t;
777 type2 b_t;
778 TYPE a_T, b_T, prod_T;
780 S1 a_t = ;
781 S2 b_t = ;
782 S3 a_T = (TYPE) a_t;
783 S4 b_T = (TYPE) b_t;
784 S5 prod_T = a_T * b_T;
786 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
788 Also detect unsigned cases:
790 unsigned type1 a_t;
791 unsigned type2 b_t;
792 unsigned TYPE u_prod_T;
793 TYPE a_T, b_T, prod_T;
795 S1 a_t = ;
796 S2 b_t = ;
797 S3 a_T = (TYPE) a_t;
798 S4 b_T = (TYPE) b_t;
799 S5 prod_T = a_T * b_T;
800 S6 u_prod_T = (unsigned TYPE) prod_T;
802 and multiplication by constants:
804 type a_t;
805 TYPE a_T, prod_T;
807 S1 a_t = ;
808 S3 a_T = (TYPE) a_t;
809 S5 prod_T = a_T * CONST;
811 A special case of multiplication by constants is when 'TYPE' is 4 times
812 bigger than 'type', but CONST fits an intermediate type 2 times smaller
813 than 'TYPE'. In that case we create an additional pattern stmt for S3
814 to create a variable of the intermediate type, and perform widen-mult
815 on the intermediate type as well:
817 type a_t;
818 interm_type a_it;
819 TYPE a_T, prod_T, prod_T';
821 S1 a_t = ;
822 S3 a_T = (TYPE) a_t;
823 '--> a_it = (interm_type) a_t;
824 S5 prod_T = a_T * CONST;
825 '--> prod_T' = a_it w* CONST;
827 Input/Output:
829 * STMTS: Contains a stmt from which the pattern search begins. In the
830 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
831 is detected. In case of unsigned widen-mult, the original stmt (S5) is
832 replaced with S6 in STMTS. In case of multiplication by a constant
833 of an intermediate type (the last case above), STMTS also contains S3
834 (inserted before S5).
836 Output:
838 * TYPE_IN: The type of the input arguments to the pattern.
840 * TYPE_OUT: The type of the output of this pattern.
842 * Return value: A new stmt that will be used to replace the sequence of
843 stmts that constitute the pattern. In this case it will be:
844 WIDEN_MULT <a_t, b_t>
845 If the result of WIDEN_MULT needs to be converted to a larger type, the
846 returned stmt will be this type conversion stmt.
849 static gimple
850 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
851 tree *type_in, tree *type_out)
853 gimple last_stmt = stmts->pop ();
854 gimple def_stmt0, def_stmt1;
855 tree oprnd0, oprnd1;
856 tree type, half_type0, half_type1;
857 gimple new_stmt = NULL, pattern_stmt = NULL;
858 tree vectype, vecitype;
859 tree var;
860 enum tree_code dummy_code;
861 int dummy_int;
862 vec<tree> dummy_vec;
863 bool op1_ok;
864 bool promotion;
866 if (!is_gimple_assign (last_stmt))
867 return NULL;
869 type = gimple_expr_type (last_stmt);
871 /* Starting from LAST_STMT, follow the defs of its uses in search
872 of the above pattern. */
874 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
875 return NULL;
877 oprnd0 = gimple_assign_rhs1 (last_stmt);
878 oprnd1 = gimple_assign_rhs2 (last_stmt);
879 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
880 || !types_compatible_p (TREE_TYPE (oprnd1), type))
881 return NULL;
883 /* Check argument 0. */
884 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
885 &promotion)
886 || !promotion)
887 return NULL;
888 /* Check argument 1. */
889 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
890 &def_stmt1, &promotion);
892 if (op1_ok && promotion)
894 oprnd0 = gimple_assign_rhs1 (def_stmt0);
895 oprnd1 = gimple_assign_rhs1 (def_stmt1);
897 else
899 if (TREE_CODE (oprnd1) == INTEGER_CST
900 && TREE_CODE (half_type0) == INTEGER_TYPE
901 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
902 &oprnd0, &new_stmt, type,
903 &half_type0, def_stmt0))
905 half_type1 = half_type0;
906 oprnd1 = fold_convert (half_type1, oprnd1);
908 else
909 return NULL;
912 /* If the two arguments have different sizes, convert the one with
913 the smaller type into the larger type. */
914 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
916 /* If we already used up the single-stmt slot give up. */
917 if (new_stmt)
918 return NULL;
920 tree* oprnd = NULL;
921 gimple def_stmt = NULL;
923 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
925 def_stmt = def_stmt0;
926 half_type0 = half_type1;
927 oprnd = &oprnd0;
929 else
931 def_stmt = def_stmt1;
932 half_type1 = half_type0;
933 oprnd = &oprnd1;
936 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
937 tree new_oprnd = make_ssa_name (half_type0);
938 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
939 *oprnd = new_oprnd;
942 /* Handle unsigned case. Look for
943 S6 u_prod_T = (unsigned TYPE) prod_T;
944 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
945 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
947 gimple use_stmt;
948 tree use_lhs;
949 tree use_type;
951 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
952 return NULL;
954 use_stmt = vect_single_imm_use (last_stmt);
955 if (!use_stmt || !is_gimple_assign (use_stmt)
956 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
957 return NULL;
959 use_lhs = gimple_assign_lhs (use_stmt);
960 use_type = TREE_TYPE (use_lhs);
961 if (!INTEGRAL_TYPE_P (use_type)
962 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
963 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
964 return NULL;
966 type = use_type;
967 last_stmt = use_stmt;
970 if (!types_compatible_p (half_type0, half_type1))
971 return NULL;
973 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
974 to get an intermediate result of type ITYPE. In this case we need
975 to build a statement to convert this intermediate result to type TYPE. */
976 tree itype = type;
977 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
978 itype = build_nonstandard_integer_type
979 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
980 TYPE_UNSIGNED (type));
982 /* Pattern detected. */
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "vect_recog_widen_mult_pattern: detected:\n");
987 /* Check target support */
988 vectype = get_vectype_for_scalar_type (half_type0);
989 vecitype = get_vectype_for_scalar_type (itype);
990 if (!vectype
991 || !vecitype
992 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
993 vecitype, vectype,
994 &dummy_code, &dummy_code,
995 &dummy_int, &dummy_vec))
996 return NULL;
998 *type_in = vectype;
999 *type_out = get_vectype_for_scalar_type (type);
1001 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1002 var = vect_recog_temp_ssa_var (itype, NULL);
1003 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
1005 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1006 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1007 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1008 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1010 /* If the original two operands have different sizes, we may need to convert
1011 the smaller one into the larget type. If this is the case, at this point
1012 the new stmt is already built. */
1013 if (new_stmt)
1015 append_pattern_def_seq (stmt_vinfo, new_stmt);
1016 stmt_vec_info new_stmt_info
1017 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
1018 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1019 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1022 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1023 the result of the widen-mult operation into type TYPE. */
1024 if (itype != type)
1026 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1027 stmt_vec_info pattern_stmt_info
1028 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1029 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1030 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1031 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1032 NOP_EXPR,
1033 gimple_assign_lhs (pattern_stmt));
1036 if (dump_enabled_p ())
1037 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1039 stmts->safe_push (last_stmt);
1040 return pattern_stmt;
1044 /* Function vect_recog_pow_pattern
1046 Try to find the following pattern:
1048 x = POW (y, N);
1050 with POW being one of pow, powf, powi, powif and N being
1051 either 2 or 0.5.
1053 Input:
1055 * LAST_STMT: A stmt from which the pattern search begins.
1057 Output:
1059 * TYPE_IN: The type of the input arguments to the pattern.
1061 * TYPE_OUT: The type of the output of this pattern.
1063 * Return value: A new stmt that will be used to replace the sequence of
1064 stmts that constitute the pattern. In this case it will be:
1065 x = x * x
1067 x = sqrt (x)
1070 static gimple
1071 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
1072 tree *type_out)
1074 gimple last_stmt = (*stmts)[0];
1075 tree fn, base, exp = NULL;
1076 gimple stmt;
1077 tree var;
1079 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1080 return NULL;
1082 fn = gimple_call_fndecl (last_stmt);
1083 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1084 return NULL;
1086 switch (DECL_FUNCTION_CODE (fn))
1088 case BUILT_IN_POWIF:
1089 case BUILT_IN_POWI:
1090 case BUILT_IN_POWF:
1091 case BUILT_IN_POW:
1092 base = gimple_call_arg (last_stmt, 0);
1093 exp = gimple_call_arg (last_stmt, 1);
1094 if (TREE_CODE (exp) != REAL_CST
1095 && TREE_CODE (exp) != INTEGER_CST)
1096 return NULL;
1097 break;
1099 default:
1100 return NULL;
1103 /* We now have a pow or powi builtin function call with a constant
1104 exponent. */
1106 *type_out = NULL_TREE;
1108 /* Catch squaring. */
1109 if ((tree_fits_shwi_p (exp)
1110 && tree_to_shwi (exp) == 2)
1111 || (TREE_CODE (exp) == REAL_CST
1112 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
1114 *type_in = TREE_TYPE (base);
1116 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1117 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1118 return stmt;
1121 /* Catch square root. */
1122 if (TREE_CODE (exp) == REAL_CST
1123 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1125 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1126 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1127 if (*type_in)
1129 gcall *stmt = gimple_build_call (newfn, 1, base);
1130 if (vectorizable_function (stmt, *type_in, *type_in)
1131 != NULL_TREE)
1133 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1134 gimple_call_set_lhs (stmt, var);
1135 return stmt;
1140 return NULL;
1144 /* Function vect_recog_widen_sum_pattern
1146 Try to find the following pattern:
1148 type x_t;
1149 TYPE x_T, sum = init;
1150 loop:
1151 sum_0 = phi <init, sum_1>
1152 S1 x_t = *p;
1153 S2 x_T = (TYPE) x_t;
1154 S3 sum_1 = x_T + sum_0;
1156 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1157 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1158 a special case of a reduction computation.
1160 Input:
1162 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1163 when this function is called with S3, the pattern {S2,S3} will be detected.
1165 Output:
1167 * TYPE_IN: The type of the input arguments to the pattern.
1169 * TYPE_OUT: The type of the output of this pattern.
1171 * Return value: A new stmt that will be used to replace the sequence of
1172 stmts that constitute the pattern. In this case it will be:
1173 WIDEN_SUM <x_t, sum_0>
1175 Note: The widening-sum idiom is a widening reduction pattern that is
1176 vectorized without preserving all the intermediate results. It
1177 produces only N/2 (widened) results (by summing up pairs of
1178 intermediate results) rather than all N results. Therefore, we
1179 cannot allow this pattern when we want to get all the results and in
1180 the correct order (as is the case when this computation is in an
1181 inner-loop nested in an outer-loop that us being vectorized). */
1183 static gimple
1184 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
1185 tree *type_out)
1187 gimple stmt, last_stmt = (*stmts)[0];
1188 tree oprnd0, oprnd1;
1189 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1190 tree type, half_type;
1191 gimple pattern_stmt;
1192 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1193 struct loop *loop;
1194 tree var;
1195 bool promotion;
1197 if (!loop_info)
1198 return NULL;
1200 loop = LOOP_VINFO_LOOP (loop_info);
1202 /* We don't allow changing the order of the computation in the inner-loop
1203 when doing outer-loop vectorization. */
1204 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1205 return NULL;
1207 if (!is_gimple_assign (last_stmt))
1208 return NULL;
1210 type = gimple_expr_type (last_stmt);
1212 /* Look for the following pattern
1213 DX = (TYPE) X;
1214 sum_1 = DX + sum_0;
1215 In which DX is at least double the size of X, and sum_1 has been
1216 recognized as a reduction variable.
1219 /* Starting from LAST_STMT, follow the defs of its uses in search
1220 of the above pattern. */
1222 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1223 return NULL;
1225 oprnd0 = gimple_assign_rhs1 (last_stmt);
1226 oprnd1 = gimple_assign_rhs2 (last_stmt);
1227 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1228 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1229 return NULL;
1231 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1232 we know that oprnd1 is the reduction variable (defined by a loop-header
1233 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1234 Left to check that oprnd0 is defined by a cast from type 'type' to type
1235 'TYPE'. */
1237 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1238 &promotion)
1239 || !promotion)
1240 return NULL;
1242 oprnd0 = gimple_assign_rhs1 (stmt);
1243 *type_in = half_type;
1244 *type_out = type;
1246 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1247 var = vect_recog_temp_ssa_var (type, NULL);
1248 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1250 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE, vect_location,
1253 "vect_recog_widen_sum_pattern: detected: ");
1254 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1255 dump_printf (MSG_NOTE, "\n");
1258 return pattern_stmt;
1262 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1264 Input:
1265 STMT - a statement to check.
1266 DEF - we support operations with two operands, one of which is constant.
1267 The other operand can be defined by a demotion operation, or by a
1268 previous statement in a sequence of over-promoted operations. In the
1269 later case DEF is used to replace that operand. (It is defined by a
1270 pattern statement we created for the previous statement in the
1271 sequence).
1273 Input/output:
1274 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1275 NULL, it's the type of DEF.
1276 STMTS - additional pattern statements. If a pattern statement (type
1277 conversion) is created in this function, its original statement is
1278 added to STMTS.
1280 Output:
1281 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1282 operands to use in the new pattern statement for STMT (will be created
1283 in vect_recog_over_widening_pattern ()).
1284 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1285 statements for STMT: the first one is a type promotion and the second
1286 one is the operation itself. We return the type promotion statement
1287 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1288 the second pattern statement. */
1290 static bool
1291 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1292 tree *op0, tree *op1, gimple *new_def_stmt,
1293 vec<gimple> *stmts)
1295 enum tree_code code;
1296 tree const_oprnd, oprnd;
1297 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1298 gimple def_stmt, new_stmt;
1299 bool first = false;
1300 bool promotion;
1302 *op0 = NULL_TREE;
1303 *op1 = NULL_TREE;
1304 *new_def_stmt = NULL;
1306 if (!is_gimple_assign (stmt))
1307 return false;
1309 code = gimple_assign_rhs_code (stmt);
1310 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1311 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1312 return false;
1314 oprnd = gimple_assign_rhs1 (stmt);
1315 const_oprnd = gimple_assign_rhs2 (stmt);
1316 type = gimple_expr_type (stmt);
1318 if (TREE_CODE (oprnd) != SSA_NAME
1319 || TREE_CODE (const_oprnd) != INTEGER_CST)
1320 return false;
1322 /* If oprnd has other uses besides that in stmt we cannot mark it
1323 as being part of a pattern only. */
1324 if (!has_single_use (oprnd))
1325 return false;
1327 /* If we are in the middle of a sequence, we use DEF from a previous
1328 statement. Otherwise, OPRND has to be a result of type promotion. */
1329 if (*new_type)
1331 half_type = *new_type;
1332 oprnd = def;
1334 else
1336 first = true;
1337 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1338 &promotion)
1339 || !promotion
1340 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1341 return false;
1344 /* Can we perform the operation on a smaller type? */
1345 switch (code)
1347 case BIT_IOR_EXPR:
1348 case BIT_XOR_EXPR:
1349 case BIT_AND_EXPR:
1350 if (!int_fits_type_p (const_oprnd, half_type))
1352 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1353 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1354 return false;
1356 interm_type = build_nonstandard_integer_type (
1357 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1358 if (!int_fits_type_p (const_oprnd, interm_type))
1359 return false;
1362 break;
1364 case LSHIFT_EXPR:
1365 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1366 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1367 return false;
1369 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1370 (e.g., if the original value was char, the shift amount is at most 8
1371 if we want to use short). */
1372 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1373 return false;
1375 interm_type = build_nonstandard_integer_type (
1376 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1378 if (!vect_supportable_shift (code, interm_type))
1379 return false;
1381 break;
1383 case RSHIFT_EXPR:
1384 if (vect_supportable_shift (code, half_type))
1385 break;
1387 /* Try intermediate type - HALF_TYPE is not supported. */
1388 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1389 return false;
1391 interm_type = build_nonstandard_integer_type (
1392 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1394 if (!vect_supportable_shift (code, interm_type))
1395 return false;
1397 break;
1399 default:
1400 gcc_unreachable ();
1403 /* There are four possible cases:
1404 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1405 the first statement in the sequence)
1406 a. The original, HALF_TYPE, is not enough - we replace the promotion
1407 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1408 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1409 promotion.
1410 2. OPRND is defined by a pattern statement we created.
1411 a. Its type is not sufficient for the operation, we create a new stmt:
1412 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1413 this statement in NEW_DEF_STMT, and it is later put in
1414 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1415 b. OPRND is good to use in the new statement. */
1416 if (first)
1418 if (interm_type)
1420 /* Replace the original type conversion HALF_TYPE->TYPE with
1421 HALF_TYPE->INTERM_TYPE. */
1422 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1424 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1425 /* Check if the already created pattern stmt is what we need. */
1426 if (!is_gimple_assign (new_stmt)
1427 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1428 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1429 return false;
1431 stmts->safe_push (def_stmt);
1432 oprnd = gimple_assign_lhs (new_stmt);
1434 else
1436 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1437 oprnd = gimple_assign_rhs1 (def_stmt);
1438 new_oprnd = make_ssa_name (interm_type);
1439 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1440 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1441 stmts->safe_push (def_stmt);
1442 oprnd = new_oprnd;
1445 else
1447 /* Retrieve the operand before the type promotion. */
1448 oprnd = gimple_assign_rhs1 (def_stmt);
1451 else
1453 if (interm_type)
1455 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1456 new_oprnd = make_ssa_name (interm_type);
1457 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1458 oprnd = new_oprnd;
1459 *new_def_stmt = new_stmt;
1462 /* Otherwise, OPRND is already set. */
1465 if (interm_type)
1466 *new_type = interm_type;
1467 else
1468 *new_type = half_type;
1470 *op0 = oprnd;
1471 *op1 = fold_convert (*new_type, const_oprnd);
1473 return true;
1477 /* Try to find a statement or a sequence of statements that can be performed
1478 on a smaller type:
1480 type x_t;
1481 TYPE x_T, res0_T, res1_T;
1482 loop:
1483 S1 x_t = *p;
1484 S2 x_T = (TYPE) x_t;
1485 S3 res0_T = op (x_T, C0);
1486 S4 res1_T = op (res0_T, C1);
1487 S5 ... = () res1_T; - type demotion
1489 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1490 constants.
1491 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1492 be 'type' or some intermediate type. For now, we expect S5 to be a type
1493 demotion operation. We also check that S3 and S4 have only one use. */
1495 static gimple
1496 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1497 tree *type_in, tree *type_out)
1499 gimple stmt = stmts->pop ();
1500 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1501 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1502 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1503 bool first;
1504 tree type = NULL;
1506 first = true;
1507 while (1)
1509 if (!vinfo_for_stmt (stmt)
1510 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1511 return NULL;
1513 new_def_stmt = NULL;
1514 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1515 &op0, &op1, &new_def_stmt,
1516 stmts))
1518 if (first)
1519 return NULL;
1520 else
1521 break;
1524 /* STMT can be performed on a smaller type. Check its uses. */
1525 use_stmt = vect_single_imm_use (stmt);
1526 if (!use_stmt || !is_gimple_assign (use_stmt))
1527 return NULL;
1529 /* Create pattern statement for STMT. */
1530 vectype = get_vectype_for_scalar_type (new_type);
1531 if (!vectype)
1532 return NULL;
1534 /* We want to collect all the statements for which we create pattern
1535 statetments, except for the case when the last statement in the
1536 sequence doesn't have a corresponding pattern statement. In such
1537 case we associate the last pattern statement with the last statement
1538 in the sequence. Therefore, we only add the original statement to
1539 the list if we know that it is not the last. */
1540 if (prev_stmt)
1541 stmts->safe_push (prev_stmt);
1543 var = vect_recog_temp_ssa_var (new_type, NULL);
1544 pattern_stmt
1545 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1546 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1547 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1549 if (dump_enabled_p ())
1551 dump_printf_loc (MSG_NOTE, vect_location,
1552 "created pattern stmt: ");
1553 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1554 dump_printf (MSG_NOTE, "\n");
1557 type = gimple_expr_type (stmt);
1558 prev_stmt = stmt;
1559 stmt = use_stmt;
1561 first = false;
1564 /* We got a sequence. We expect it to end with a type demotion operation.
1565 Otherwise, we quit (for now). There are three possible cases: the
1566 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1567 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1568 NEW_TYPE differs (we create a new conversion statement). */
1569 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1571 use_lhs = gimple_assign_lhs (use_stmt);
1572 use_type = TREE_TYPE (use_lhs);
1573 /* Support only type demotion or signedess change. */
1574 if (!INTEGRAL_TYPE_P (use_type)
1575 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1576 return NULL;
1578 /* Check that NEW_TYPE is not bigger than the conversion result. */
1579 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1580 return NULL;
1582 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1583 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1585 /* Create NEW_TYPE->USE_TYPE conversion. */
1586 new_oprnd = make_ssa_name (use_type);
1587 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1588 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1590 *type_in = get_vectype_for_scalar_type (new_type);
1591 *type_out = get_vectype_for_scalar_type (use_type);
1593 /* We created a pattern statement for the last statement in the
1594 sequence, so we don't need to associate it with the pattern
1595 statement created for PREV_STMT. Therefore, we add PREV_STMT
1596 to the list in order to mark it later in vect_pattern_recog_1. */
1597 if (prev_stmt)
1598 stmts->safe_push (prev_stmt);
1600 else
1602 if (prev_stmt)
1603 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1604 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1606 *type_in = vectype;
1607 *type_out = NULL_TREE;
1610 stmts->safe_push (use_stmt);
1612 else
1613 /* TODO: support general case, create a conversion to the correct type. */
1614 return NULL;
1616 /* Pattern detected. */
1617 if (dump_enabled_p ())
1619 dump_printf_loc (MSG_NOTE, vect_location,
1620 "vect_recog_over_widening_pattern: detected: ");
1621 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1622 dump_printf (MSG_NOTE, "\n");
1625 return pattern_stmt;
1628 /* Detect widening shift pattern:
1630 type a_t;
1631 TYPE a_T, res_T;
1633 S1 a_t = ;
1634 S2 a_T = (TYPE) a_t;
1635 S3 res_T = a_T << CONST;
1637 where type 'TYPE' is at least double the size of type 'type'.
1639 Also detect cases where the shift result is immediately converted
1640 to another type 'result_type' that is no larger in size than 'TYPE'.
1641 In those cases we perform a widen-shift that directly results in
1642 'result_type', to avoid a possible over-widening situation:
1644 type a_t;
1645 TYPE a_T, res_T;
1646 result_type res_result;
1648 S1 a_t = ;
1649 S2 a_T = (TYPE) a_t;
1650 S3 res_T = a_T << CONST;
1651 S4 res_result = (result_type) res_T;
1652 '--> res_result' = a_t w<< CONST;
1654 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1655 create an additional pattern stmt for S2 to create a variable of an
1656 intermediate type, and perform widen-shift on the intermediate type:
1658 type a_t;
1659 interm_type a_it;
1660 TYPE a_T, res_T, res_T';
1662 S1 a_t = ;
1663 S2 a_T = (TYPE) a_t;
1664 '--> a_it = (interm_type) a_t;
1665 S3 res_T = a_T << CONST;
1666 '--> res_T' = a_it <<* CONST;
1668 Input/Output:
1670 * STMTS: Contains a stmt from which the pattern search begins.
1671 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1672 in STMTS. When an intermediate type is used and a pattern statement is
1673 created for S2, we also put S2 here (before S3).
1675 Output:
1677 * TYPE_IN: The type of the input arguments to the pattern.
1679 * TYPE_OUT: The type of the output of this pattern.
1681 * Return value: A new stmt that will be used to replace the sequence of
1682 stmts that constitute the pattern. In this case it will be:
1683 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1685 static gimple
1686 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1687 tree *type_in, tree *type_out)
1689 gimple last_stmt = stmts->pop ();
1690 gimple def_stmt0;
1691 tree oprnd0, oprnd1;
1692 tree type, half_type0;
1693 gimple pattern_stmt;
1694 tree vectype, vectype_out = NULL_TREE;
1695 tree var;
1696 enum tree_code dummy_code;
1697 int dummy_int;
1698 vec<tree> dummy_vec;
1699 gimple use_stmt;
1700 bool promotion;
1702 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1703 return NULL;
1705 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1706 return NULL;
1708 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1709 return NULL;
1711 oprnd0 = gimple_assign_rhs1 (last_stmt);
1712 oprnd1 = gimple_assign_rhs2 (last_stmt);
1713 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1714 return NULL;
1716 /* Check operand 0: it has to be defined by a type promotion. */
1717 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1718 &promotion)
1719 || !promotion)
1720 return NULL;
1722 /* Check operand 1: has to be positive. We check that it fits the type
1723 in vect_handle_widen_op_by_const (). */
1724 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1725 return NULL;
1727 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1728 type = gimple_expr_type (last_stmt);
1730 /* Check for subsequent conversion to another type. */
1731 use_stmt = vect_single_imm_use (last_stmt);
1732 if (use_stmt && is_gimple_assign (use_stmt)
1733 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1734 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1736 tree use_lhs = gimple_assign_lhs (use_stmt);
1737 tree use_type = TREE_TYPE (use_lhs);
1739 if (INTEGRAL_TYPE_P (use_type)
1740 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1742 last_stmt = use_stmt;
1743 type = use_type;
1747 /* Check if this a widening operation. */
1748 gimple wstmt = NULL;
1749 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1750 &oprnd0, &wstmt,
1751 type, &half_type0, def_stmt0))
1752 return NULL;
1754 /* Pattern detected. */
1755 if (dump_enabled_p ())
1756 dump_printf_loc (MSG_NOTE, vect_location,
1757 "vect_recog_widen_shift_pattern: detected:\n");
1759 /* Check target support. */
1760 vectype = get_vectype_for_scalar_type (half_type0);
1761 vectype_out = get_vectype_for_scalar_type (type);
1763 if (!vectype
1764 || !vectype_out
1765 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1766 vectype_out, vectype,
1767 &dummy_code, &dummy_code,
1768 &dummy_int, &dummy_vec))
1769 return NULL;
1771 *type_in = vectype;
1772 *type_out = vectype_out;
1774 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1775 var = vect_recog_temp_ssa_var (type, NULL);
1776 pattern_stmt =
1777 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1778 if (wstmt)
1780 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1781 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1782 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1783 new_pattern_def_seq (stmt_vinfo, wstmt);
1784 stmt_vec_info new_stmt_info
1785 = new_stmt_vec_info (wstmt, loop_vinfo, bb_vinfo);
1786 set_vinfo_for_stmt (wstmt, new_stmt_info);
1787 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1790 if (dump_enabled_p ())
1791 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1793 stmts->safe_push (last_stmt);
1794 return pattern_stmt;
1797 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1799 type a_t, b_t, c_t;
1801 S0 a_t = b_t r<< c_t;
1803 Input/Output:
1805 * STMTS: Contains a stmt from which the pattern search begins,
1806 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1807 with a sequence:
1809 S1 d_t = -c_t;
1810 S2 e_t = d_t & (B - 1);
1811 S3 f_t = b_t << c_t;
1812 S4 g_t = b_t >> e_t;
1813 S0 a_t = f_t | g_t;
1815 where B is element bitsize of type.
1817 Output:
1819 * TYPE_IN: The type of the input arguments to the pattern.
1821 * TYPE_OUT: The type of the output of this pattern.
1823 * Return value: A new stmt that will be used to replace the rotate
1824 S0 stmt. */
1826 static gimple
1827 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1829 gimple last_stmt = stmts->pop ();
1830 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1831 gimple pattern_stmt, def_stmt;
1832 enum tree_code rhs_code;
1833 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1834 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1835 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1836 enum vect_def_type dt;
1837 optab optab1, optab2;
1838 edge ext_def = NULL;
1840 if (!is_gimple_assign (last_stmt))
1841 return NULL;
1843 rhs_code = gimple_assign_rhs_code (last_stmt);
1844 switch (rhs_code)
1846 case LROTATE_EXPR:
1847 case RROTATE_EXPR:
1848 break;
1849 default:
1850 return NULL;
1853 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1854 return NULL;
1856 lhs = gimple_assign_lhs (last_stmt);
1857 oprnd0 = gimple_assign_rhs1 (last_stmt);
1858 type = TREE_TYPE (oprnd0);
1859 oprnd1 = gimple_assign_rhs2 (last_stmt);
1860 if (TREE_CODE (oprnd0) != SSA_NAME
1861 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1862 || !INTEGRAL_TYPE_P (type)
1863 || !TYPE_UNSIGNED (type))
1864 return NULL;
1866 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1867 &def, &dt))
1868 return NULL;
1870 if (dt != vect_internal_def
1871 && dt != vect_constant_def
1872 && dt != vect_external_def)
1873 return NULL;
1875 vectype = get_vectype_for_scalar_type (type);
1876 if (vectype == NULL_TREE)
1877 return NULL;
1879 /* If vector/vector or vector/scalar rotate is supported by the target,
1880 don't do anything here. */
1881 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1882 if (optab1
1883 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1884 return NULL;
1886 if (bb_vinfo != NULL || dt != vect_internal_def)
1888 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1889 if (optab2
1890 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1891 return NULL;
1894 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1895 don't do anything here either. */
1896 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1897 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1898 if (!optab1
1899 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1900 || !optab2
1901 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1903 if (bb_vinfo == NULL && dt == vect_internal_def)
1904 return NULL;
1905 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1906 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1907 if (!optab1
1908 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1909 || !optab2
1910 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1911 return NULL;
1914 *type_in = vectype;
1915 *type_out = vectype;
1916 if (*type_in == NULL_TREE)
1917 return NULL;
1919 if (dt == vect_external_def
1920 && TREE_CODE (oprnd1) == SSA_NAME
1921 && loop_vinfo)
1923 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1924 ext_def = loop_preheader_edge (loop);
1925 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1927 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1928 if (bb == NULL
1929 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1930 ext_def = NULL;
1934 def = NULL_TREE;
1935 if (TREE_CODE (oprnd1) == INTEGER_CST
1936 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1937 def = oprnd1;
1938 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1940 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1941 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1942 && TYPE_PRECISION (TREE_TYPE (rhs1))
1943 == TYPE_PRECISION (type))
1944 def = rhs1;
1947 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1948 if (def == NULL_TREE)
1950 def = vect_recog_temp_ssa_var (type, NULL);
1951 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1952 if (ext_def)
1954 basic_block new_bb
1955 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1956 gcc_assert (!new_bb);
1958 else
1959 append_pattern_def_seq (stmt_vinfo, def_stmt);
1961 stype = TREE_TYPE (def);
1963 if (TREE_CODE (def) == INTEGER_CST)
1965 if (!tree_fits_uhwi_p (def)
1966 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1967 || integer_zerop (def))
1968 return NULL;
1969 def2 = build_int_cst (stype,
1970 GET_MODE_PRECISION (TYPE_MODE (type))
1971 - tree_to_uhwi (def));
1973 else
1975 tree vecstype = get_vectype_for_scalar_type (stype);
1976 stmt_vec_info def_stmt_vinfo;
1978 if (vecstype == NULL_TREE)
1979 return NULL;
1980 def2 = vect_recog_temp_ssa_var (stype, NULL);
1981 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1982 if (ext_def)
1984 basic_block new_bb
1985 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1986 gcc_assert (!new_bb);
1988 else
1990 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1991 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1992 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1993 append_pattern_def_seq (stmt_vinfo, def_stmt);
1996 def2 = vect_recog_temp_ssa_var (stype, NULL);
1997 tree mask
1998 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1999 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2000 gimple_assign_lhs (def_stmt), mask);
2001 if (ext_def)
2003 basic_block new_bb
2004 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2005 gcc_assert (!new_bb);
2007 else
2009 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2010 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2011 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2012 append_pattern_def_seq (stmt_vinfo, def_stmt);
2016 var1 = vect_recog_temp_ssa_var (type, NULL);
2017 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2018 ? LSHIFT_EXPR : RSHIFT_EXPR,
2019 oprnd0, def);
2020 append_pattern_def_seq (stmt_vinfo, def_stmt);
2022 var2 = vect_recog_temp_ssa_var (type, NULL);
2023 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2024 ? RSHIFT_EXPR : LSHIFT_EXPR,
2025 oprnd0, def2);
2026 append_pattern_def_seq (stmt_vinfo, def_stmt);
2028 /* Pattern detected. */
2029 if (dump_enabled_p ())
2030 dump_printf_loc (MSG_NOTE, vect_location,
2031 "vect_recog_rotate_pattern: detected:\n");
2033 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2034 var = vect_recog_temp_ssa_var (type, NULL);
2035 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2037 if (dump_enabled_p ())
2038 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2040 stmts->safe_push (last_stmt);
2041 return pattern_stmt;
2044 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2045 vectorized:
2047 type a_t;
2048 TYPE b_T, res_T;
2050 S1 a_t = ;
2051 S2 b_T = ;
2052 S3 res_T = b_T op a_t;
2054 where type 'TYPE' is a type with different size than 'type',
2055 and op is <<, >> or rotate.
2057 Also detect cases:
2059 type a_t;
2060 TYPE b_T, c_T, res_T;
2062 S0 c_T = ;
2063 S1 a_t = (type) c_T;
2064 S2 b_T = ;
2065 S3 res_T = b_T op a_t;
2067 Input/Output:
2069 * STMTS: Contains a stmt from which the pattern search begins,
2070 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2071 with a shift/rotate which has same type on both operands, in the
2072 second case just b_T op c_T, in the first case with added cast
2073 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2075 Output:
2077 * TYPE_IN: The type of the input arguments to the pattern.
2079 * TYPE_OUT: The type of the output of this pattern.
2081 * Return value: A new stmt that will be used to replace the shift/rotate
2082 S3 stmt. */
2084 static gimple
2085 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
2086 tree *type_in, tree *type_out)
2088 gimple last_stmt = stmts->pop ();
2089 tree oprnd0, oprnd1, lhs, var;
2090 gimple pattern_stmt, def_stmt;
2091 enum tree_code rhs_code;
2092 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2093 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2094 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2095 enum vect_def_type dt;
2096 tree def;
2098 if (!is_gimple_assign (last_stmt))
2099 return NULL;
2101 rhs_code = gimple_assign_rhs_code (last_stmt);
2102 switch (rhs_code)
2104 case LSHIFT_EXPR:
2105 case RSHIFT_EXPR:
2106 case LROTATE_EXPR:
2107 case RROTATE_EXPR:
2108 break;
2109 default:
2110 return NULL;
2113 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2114 return NULL;
2116 lhs = gimple_assign_lhs (last_stmt);
2117 oprnd0 = gimple_assign_rhs1 (last_stmt);
2118 oprnd1 = gimple_assign_rhs2 (last_stmt);
2119 if (TREE_CODE (oprnd0) != SSA_NAME
2120 || TREE_CODE (oprnd1) != SSA_NAME
2121 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2122 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2123 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2124 || TYPE_PRECISION (TREE_TYPE (lhs))
2125 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2126 return NULL;
2128 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
2129 &def, &dt))
2130 return NULL;
2132 if (dt != vect_internal_def)
2133 return NULL;
2135 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2136 *type_out = *type_in;
2137 if (*type_in == NULL_TREE)
2138 return NULL;
2140 def = NULL_TREE;
2141 if (gimple_assign_cast_p (def_stmt))
2143 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2144 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2145 && TYPE_PRECISION (TREE_TYPE (rhs1))
2146 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2147 def = rhs1;
2150 if (def == NULL_TREE)
2152 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2153 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2154 new_pattern_def_seq (stmt_vinfo, def_stmt);
2157 /* Pattern detected. */
2158 if (dump_enabled_p ())
2159 dump_printf_loc (MSG_NOTE, vect_location,
2160 "vect_recog_vector_vector_shift_pattern: detected:\n");
2162 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2163 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2164 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2166 if (dump_enabled_p ())
2167 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2169 stmts->safe_push (last_stmt);
2170 return pattern_stmt;
2173 /* Detect a signed division by a constant that wouldn't be
2174 otherwise vectorized:
2176 type a_t, b_t;
2178 S1 a_t = b_t / N;
2180 where type 'type' is an integral type and N is a constant.
2182 Similarly handle modulo by a constant:
2184 S4 a_t = b_t % N;
2186 Input/Output:
2188 * STMTS: Contains a stmt from which the pattern search begins,
2189 i.e. the division stmt. S1 is replaced by if N is a power
2190 of two constant and type is signed:
2191 S3 y_t = b_t < 0 ? N - 1 : 0;
2192 S2 x_t = b_t + y_t;
2193 S1' a_t = x_t >> log2 (N);
2195 S4 is replaced if N is a power of two constant and
2196 type is signed by (where *_T temporaries have unsigned type):
2197 S9 y_T = b_t < 0 ? -1U : 0U;
2198 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2199 S7 z_t = (type) z_T;
2200 S6 w_t = b_t + z_t;
2201 S5 x_t = w_t & (N - 1);
2202 S4' a_t = x_t - z_t;
2204 Output:
2206 * TYPE_IN: The type of the input arguments to the pattern.
2208 * TYPE_OUT: The type of the output of this pattern.
2210 * Return value: A new stmt that will be used to replace the division
2211 S1 or modulo S4 stmt. */
2213 static gimple
2214 vect_recog_divmod_pattern (vec<gimple> *stmts,
2215 tree *type_in, tree *type_out)
2217 gimple last_stmt = stmts->pop ();
2218 tree oprnd0, oprnd1, vectype, itype, cond;
2219 gimple pattern_stmt, def_stmt;
2220 enum tree_code rhs_code;
2221 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2222 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2223 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2224 optab optab;
2225 tree q;
2226 int dummy_int, prec;
2227 stmt_vec_info def_stmt_vinfo;
2229 if (!is_gimple_assign (last_stmt))
2230 return NULL;
2232 rhs_code = gimple_assign_rhs_code (last_stmt);
2233 switch (rhs_code)
2235 case TRUNC_DIV_EXPR:
2236 case TRUNC_MOD_EXPR:
2237 break;
2238 default:
2239 return NULL;
2242 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2243 return NULL;
2245 oprnd0 = gimple_assign_rhs1 (last_stmt);
2246 oprnd1 = gimple_assign_rhs2 (last_stmt);
2247 itype = TREE_TYPE (oprnd0);
2248 if (TREE_CODE (oprnd0) != SSA_NAME
2249 || TREE_CODE (oprnd1) != INTEGER_CST
2250 || TREE_CODE (itype) != INTEGER_TYPE
2251 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2252 return NULL;
2254 vectype = get_vectype_for_scalar_type (itype);
2255 if (vectype == NULL_TREE)
2256 return NULL;
2258 /* If the target can handle vectorized division or modulo natively,
2259 don't attempt to optimize this. */
2260 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2261 if (optab != unknown_optab)
2263 machine_mode vec_mode = TYPE_MODE (vectype);
2264 int icode = (int) optab_handler (optab, vec_mode);
2265 if (icode != CODE_FOR_nothing)
2266 return NULL;
2269 prec = TYPE_PRECISION (itype);
2270 if (integer_pow2p (oprnd1))
2272 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2273 return NULL;
2275 /* Pattern detected. */
2276 if (dump_enabled_p ())
2277 dump_printf_loc (MSG_NOTE, vect_location,
2278 "vect_recog_divmod_pattern: detected:\n");
2280 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2281 build_int_cst (itype, 0));
2282 if (rhs_code == TRUNC_DIV_EXPR)
2284 tree var = vect_recog_temp_ssa_var (itype, NULL);
2285 tree shift;
2286 def_stmt
2287 = gimple_build_assign (var, COND_EXPR, cond,
2288 fold_build2 (MINUS_EXPR, itype, oprnd1,
2289 build_int_cst (itype, 1)),
2290 build_int_cst (itype, 0));
2291 new_pattern_def_seq (stmt_vinfo, def_stmt);
2292 var = vect_recog_temp_ssa_var (itype, NULL);
2293 def_stmt
2294 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2295 gimple_assign_lhs (def_stmt));
2296 append_pattern_def_seq (stmt_vinfo, def_stmt);
2298 shift = build_int_cst (itype, tree_log2 (oprnd1));
2299 pattern_stmt
2300 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2301 RSHIFT_EXPR, var, shift);
2303 else
2305 tree signmask;
2306 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2307 if (compare_tree_int (oprnd1, 2) == 0)
2309 signmask = vect_recog_temp_ssa_var (itype, NULL);
2310 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2311 build_int_cst (itype, 1),
2312 build_int_cst (itype, 0));
2313 append_pattern_def_seq (stmt_vinfo, def_stmt);
2315 else
2317 tree utype
2318 = build_nonstandard_integer_type (prec, 1);
2319 tree vecutype = get_vectype_for_scalar_type (utype);
2320 tree shift
2321 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2322 - tree_log2 (oprnd1));
2323 tree var = vect_recog_temp_ssa_var (utype, NULL);
2325 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2326 build_int_cst (utype, -1),
2327 build_int_cst (utype, 0));
2328 def_stmt_vinfo
2329 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2330 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2331 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2332 append_pattern_def_seq (stmt_vinfo, def_stmt);
2333 var = vect_recog_temp_ssa_var (utype, NULL);
2334 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2335 gimple_assign_lhs (def_stmt),
2336 shift);
2337 def_stmt_vinfo
2338 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2339 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2340 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2341 append_pattern_def_seq (stmt_vinfo, def_stmt);
2342 signmask = vect_recog_temp_ssa_var (itype, NULL);
2343 def_stmt
2344 = gimple_build_assign (signmask, NOP_EXPR, var);
2345 append_pattern_def_seq (stmt_vinfo, def_stmt);
2347 def_stmt
2348 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2349 PLUS_EXPR, oprnd0, signmask);
2350 append_pattern_def_seq (stmt_vinfo, def_stmt);
2351 def_stmt
2352 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2353 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2354 fold_build2 (MINUS_EXPR, itype, oprnd1,
2355 build_int_cst (itype, 1)));
2356 append_pattern_def_seq (stmt_vinfo, def_stmt);
2358 pattern_stmt
2359 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2360 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2361 signmask);
2364 if (dump_enabled_p ())
2365 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2368 stmts->safe_push (last_stmt);
2370 *type_in = vectype;
2371 *type_out = vectype;
2372 return pattern_stmt;
2375 if (prec > HOST_BITS_PER_WIDE_INT
2376 || integer_zerop (oprnd1))
2377 return NULL;
2379 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2380 return NULL;
2382 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2384 if (TYPE_UNSIGNED (itype))
2386 unsigned HOST_WIDE_INT mh, ml;
2387 int pre_shift, post_shift;
2388 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2389 & GET_MODE_MASK (TYPE_MODE (itype)));
2390 tree t1, t2, t3, t4;
2392 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2393 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2394 return NULL;
2396 /* Find a suitable multiplier and right shift count
2397 instead of multiplying with D. */
2398 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2400 /* If the suggested multiplier is more than SIZE bits, we can do better
2401 for even divisors, using an initial right shift. */
2402 if (mh != 0 && (d & 1) == 0)
2404 pre_shift = floor_log2 (d & -d);
2405 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2406 &ml, &post_shift, &dummy_int);
2407 gcc_assert (!mh);
2409 else
2410 pre_shift = 0;
2412 if (mh != 0)
2414 if (post_shift - 1 >= prec)
2415 return NULL;
2417 /* t1 = oprnd0 h* ml;
2418 t2 = oprnd0 - t1;
2419 t3 = t2 >> 1;
2420 t4 = t1 + t3;
2421 q = t4 >> (post_shift - 1); */
2422 t1 = vect_recog_temp_ssa_var (itype, NULL);
2423 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2424 build_int_cst (itype, ml));
2425 append_pattern_def_seq (stmt_vinfo, def_stmt);
2427 t2 = vect_recog_temp_ssa_var (itype, NULL);
2428 def_stmt
2429 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2430 append_pattern_def_seq (stmt_vinfo, def_stmt);
2432 t3 = vect_recog_temp_ssa_var (itype, NULL);
2433 def_stmt
2434 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2435 append_pattern_def_seq (stmt_vinfo, def_stmt);
2437 t4 = vect_recog_temp_ssa_var (itype, NULL);
2438 def_stmt
2439 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2441 if (post_shift != 1)
2443 append_pattern_def_seq (stmt_vinfo, def_stmt);
2445 q = vect_recog_temp_ssa_var (itype, NULL);
2446 pattern_stmt
2447 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2448 build_int_cst (itype, post_shift - 1));
2450 else
2452 q = t4;
2453 pattern_stmt = def_stmt;
2456 else
2458 if (pre_shift >= prec || post_shift >= prec)
2459 return NULL;
2461 /* t1 = oprnd0 >> pre_shift;
2462 t2 = t1 h* ml;
2463 q = t2 >> post_shift; */
2464 if (pre_shift)
2466 t1 = vect_recog_temp_ssa_var (itype, NULL);
2467 def_stmt
2468 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2469 build_int_cst (NULL, pre_shift));
2470 append_pattern_def_seq (stmt_vinfo, def_stmt);
2472 else
2473 t1 = oprnd0;
2475 t2 = vect_recog_temp_ssa_var (itype, NULL);
2476 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2477 build_int_cst (itype, ml));
2479 if (post_shift)
2481 append_pattern_def_seq (stmt_vinfo, def_stmt);
2483 q = vect_recog_temp_ssa_var (itype, NULL);
2484 def_stmt
2485 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2486 build_int_cst (itype, post_shift));
2488 else
2489 q = t2;
2491 pattern_stmt = def_stmt;
2494 else
2496 unsigned HOST_WIDE_INT ml;
2497 int post_shift;
2498 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2499 unsigned HOST_WIDE_INT abs_d;
2500 bool add = false;
2501 tree t1, t2, t3, t4;
2503 /* Give up for -1. */
2504 if (d == -1)
2505 return NULL;
2507 /* Since d might be INT_MIN, we have to cast to
2508 unsigned HOST_WIDE_INT before negating to avoid
2509 undefined signed overflow. */
2510 abs_d = (d >= 0
2511 ? (unsigned HOST_WIDE_INT) d
2512 : - (unsigned HOST_WIDE_INT) d);
2514 /* n rem d = n rem -d */
2515 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2517 d = abs_d;
2518 oprnd1 = build_int_cst (itype, abs_d);
2520 else if (HOST_BITS_PER_WIDE_INT >= prec
2521 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2522 /* This case is not handled correctly below. */
2523 return NULL;
2525 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2526 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2528 add = true;
2529 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2531 if (post_shift >= prec)
2532 return NULL;
2534 /* t1 = oprnd0 h* ml; */
2535 t1 = vect_recog_temp_ssa_var (itype, NULL);
2536 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2537 build_int_cst (itype, ml));
2539 if (add)
2541 /* t2 = t1 + oprnd0; */
2542 append_pattern_def_seq (stmt_vinfo, def_stmt);
2543 t2 = vect_recog_temp_ssa_var (itype, NULL);
2544 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2546 else
2547 t2 = t1;
2549 if (post_shift)
2551 /* t3 = t2 >> post_shift; */
2552 append_pattern_def_seq (stmt_vinfo, def_stmt);
2553 t3 = vect_recog_temp_ssa_var (itype, NULL);
2554 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2555 build_int_cst (itype, post_shift));
2557 else
2558 t3 = t2;
2560 wide_int oprnd0_min, oprnd0_max;
2561 int msb = 1;
2562 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2564 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2565 msb = 0;
2566 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2567 msb = -1;
2570 if (msb == 0 && d >= 0)
2572 /* q = t3; */
2573 q = t3;
2574 pattern_stmt = def_stmt;
2576 else
2578 /* t4 = oprnd0 >> (prec - 1);
2579 or if we know from VRP that oprnd0 >= 0
2580 t4 = 0;
2581 or if we know from VRP that oprnd0 < 0
2582 t4 = -1; */
2583 append_pattern_def_seq (stmt_vinfo, def_stmt);
2584 t4 = vect_recog_temp_ssa_var (itype, NULL);
2585 if (msb != 1)
2586 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2587 build_int_cst (itype, msb));
2588 else
2589 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2590 build_int_cst (itype, prec - 1));
2591 append_pattern_def_seq (stmt_vinfo, def_stmt);
2593 /* q = t3 - t4; or q = t4 - t3; */
2594 q = vect_recog_temp_ssa_var (itype, NULL);
2595 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2596 d < 0 ? t3 : t4);
2600 if (rhs_code == TRUNC_MOD_EXPR)
2602 tree r, t1;
2604 /* We divided. Now finish by:
2605 t1 = q * oprnd1;
2606 r = oprnd0 - t1; */
2607 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2609 t1 = vect_recog_temp_ssa_var (itype, NULL);
2610 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2611 append_pattern_def_seq (stmt_vinfo, def_stmt);
2613 r = vect_recog_temp_ssa_var (itype, NULL);
2614 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2617 /* Pattern detected. */
2618 if (dump_enabled_p ())
2620 dump_printf_loc (MSG_NOTE, vect_location,
2621 "vect_recog_divmod_pattern: detected: ");
2622 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2623 dump_printf (MSG_NOTE, "\n");
2626 stmts->safe_push (last_stmt);
2628 *type_in = vectype;
2629 *type_out = vectype;
2630 return pattern_stmt;
2633 /* Function vect_recog_mixed_size_cond_pattern
2635 Try to find the following pattern:
2637 type x_t, y_t;
2638 TYPE a_T, b_T, c_T;
2639 loop:
2640 S1 a_T = x_t CMP y_t ? b_T : c_T;
2642 where type 'TYPE' is an integral type which has different size
2643 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2644 than 'type', the constants need to fit into an integer type
2645 with the same width as 'type') or results of conversion from 'type'.
2647 Input:
2649 * LAST_STMT: A stmt from which the pattern search begins.
2651 Output:
2653 * TYPE_IN: The type of the input arguments to the pattern.
2655 * TYPE_OUT: The type of the output of this pattern.
2657 * Return value: A new stmt that will be used to replace the pattern.
2658 Additionally a def_stmt is added.
2660 a_it = x_t CMP y_t ? b_it : c_it;
2661 a_T = (TYPE) a_it; */
2663 static gimple
2664 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2665 tree *type_out)
2667 gimple last_stmt = (*stmts)[0];
2668 tree cond_expr, then_clause, else_clause;
2669 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2670 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2671 machine_mode cmpmode;
2672 gimple pattern_stmt, def_stmt;
2673 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2674 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2675 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2676 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2677 bool promotion;
2678 tree comp_scalar_type;
2680 if (!is_gimple_assign (last_stmt)
2681 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2682 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2683 return NULL;
2685 cond_expr = gimple_assign_rhs1 (last_stmt);
2686 then_clause = gimple_assign_rhs2 (last_stmt);
2687 else_clause = gimple_assign_rhs3 (last_stmt);
2689 if (!COMPARISON_CLASS_P (cond_expr))
2690 return NULL;
2692 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2693 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2694 if (comp_vectype == NULL_TREE)
2695 return NULL;
2697 type = gimple_expr_type (last_stmt);
2698 if (types_compatible_p (type, comp_scalar_type)
2699 || ((TREE_CODE (then_clause) != INTEGER_CST
2700 || TREE_CODE (else_clause) != INTEGER_CST)
2701 && !INTEGRAL_TYPE_P (comp_scalar_type))
2702 || !INTEGRAL_TYPE_P (type))
2703 return NULL;
2705 if ((TREE_CODE (then_clause) != INTEGER_CST
2706 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2707 &def_stmt0, &promotion))
2708 || (TREE_CODE (else_clause) != INTEGER_CST
2709 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2710 &def_stmt1, &promotion)))
2711 return NULL;
2713 if (orig_type0 && orig_type1
2714 && !types_compatible_p (orig_type0, orig_type1))
2715 return NULL;
2717 if (orig_type0)
2719 if (!types_compatible_p (orig_type0, comp_scalar_type))
2720 return NULL;
2721 then_clause = gimple_assign_rhs1 (def_stmt0);
2722 itype = orig_type0;
2725 if (orig_type1)
2727 if (!types_compatible_p (orig_type1, comp_scalar_type))
2728 return NULL;
2729 else_clause = gimple_assign_rhs1 (def_stmt1);
2730 itype = orig_type1;
2733 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2735 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2736 return NULL;
2738 vectype = get_vectype_for_scalar_type (type);
2739 if (vectype == NULL_TREE)
2740 return NULL;
2742 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2743 return NULL;
2745 if (itype == NULL_TREE)
2746 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2747 TYPE_UNSIGNED (type));
2749 if (itype == NULL_TREE
2750 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2751 return NULL;
2753 vecitype = get_vectype_for_scalar_type (itype);
2754 if (vecitype == NULL_TREE)
2755 return NULL;
2757 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2758 return NULL;
2760 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2762 if ((TREE_CODE (then_clause) == INTEGER_CST
2763 && !int_fits_type_p (then_clause, itype))
2764 || (TREE_CODE (else_clause) == INTEGER_CST
2765 && !int_fits_type_p (else_clause, itype)))
2766 return NULL;
2769 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2770 COND_EXPR, unshare_expr (cond_expr),
2771 fold_convert (itype, then_clause),
2772 fold_convert (itype, else_clause));
2773 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2774 NOP_EXPR, gimple_assign_lhs (def_stmt));
2776 new_pattern_def_seq (stmt_vinfo, def_stmt);
2777 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2778 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2779 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2780 *type_in = vecitype;
2781 *type_out = vectype;
2783 if (dump_enabled_p ())
2784 dump_printf_loc (MSG_NOTE, vect_location,
2785 "vect_recog_mixed_size_cond_pattern: detected:\n");
2787 return pattern_stmt;
2791 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2792 true if bool VAR can be optimized that way. */
2794 static bool
2795 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2797 gimple def_stmt;
2798 enum vect_def_type dt;
2799 tree def, rhs1;
2800 enum tree_code rhs_code;
2802 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2803 &dt))
2804 return false;
2806 if (dt != vect_internal_def)
2807 return false;
2809 if (!is_gimple_assign (def_stmt))
2810 return false;
2812 if (!has_single_use (def))
2813 return false;
2815 rhs1 = gimple_assign_rhs1 (def_stmt);
2816 rhs_code = gimple_assign_rhs_code (def_stmt);
2817 switch (rhs_code)
2819 case SSA_NAME:
2820 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2822 CASE_CONVERT:
2823 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2824 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2825 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2826 return false;
2827 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2829 case BIT_NOT_EXPR:
2830 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2832 case BIT_AND_EXPR:
2833 case BIT_IOR_EXPR:
2834 case BIT_XOR_EXPR:
2835 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2836 return false;
2837 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2838 bb_vinfo);
2840 default:
2841 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2843 tree vecitype, comp_vectype;
2845 /* If the comparison can throw, then is_gimple_condexpr will be
2846 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2847 if (stmt_could_throw_p (def_stmt))
2848 return false;
2850 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2851 if (comp_vectype == NULL_TREE)
2852 return false;
2854 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2856 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2857 tree itype
2858 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2859 vecitype = get_vectype_for_scalar_type (itype);
2860 if (vecitype == NULL_TREE)
2861 return false;
2863 else
2864 vecitype = comp_vectype;
2865 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2867 return false;
2872 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2873 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2874 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2876 static tree
2877 adjust_bool_pattern_cast (tree type, tree var)
2879 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2880 gimple cast_stmt, pattern_stmt;
2882 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2883 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2884 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2885 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2886 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2887 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2888 return gimple_assign_lhs (cast_stmt);
2892 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2893 recursively. VAR is an SSA_NAME that should be transformed from bool
2894 to a wider integer type, OUT_TYPE is the desired final integer type of
2895 the whole pattern, TRUEVAL should be NULL unless optimizing
2896 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2897 in the then_clause, STMTS is where statements with added pattern stmts
2898 should be pushed to. */
2900 static tree
2901 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2902 vec<gimple> *stmts)
2904 gimple stmt = SSA_NAME_DEF_STMT (var);
2905 enum tree_code rhs_code, def_rhs_code;
2906 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2907 location_t loc;
2908 gimple pattern_stmt, def_stmt;
2910 rhs1 = gimple_assign_rhs1 (stmt);
2911 rhs2 = gimple_assign_rhs2 (stmt);
2912 rhs_code = gimple_assign_rhs_code (stmt);
2913 loc = gimple_location (stmt);
2914 switch (rhs_code)
2916 case SSA_NAME:
2917 CASE_CONVERT:
2918 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2919 itype = TREE_TYPE (irhs1);
2920 pattern_stmt
2921 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2922 SSA_NAME, irhs1);
2923 break;
2925 case BIT_NOT_EXPR:
2926 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2927 itype = TREE_TYPE (irhs1);
2928 pattern_stmt
2929 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2930 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
2931 break;
2933 case BIT_AND_EXPR:
2934 /* Try to optimize x = y & (a < b ? 1 : 0); into
2935 x = (a < b ? y : 0);
2937 E.g. for:
2938 bool a_b, b_b, c_b;
2939 TYPE d_T;
2941 S1 a_b = x1 CMP1 y1;
2942 S2 b_b = x2 CMP2 y2;
2943 S3 c_b = a_b & b_b;
2944 S4 d_T = (TYPE) c_b;
2946 we would normally emit:
2948 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2949 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2950 S3' c_T = a_T & b_T;
2951 S4' d_T = c_T;
2953 but we can save one stmt by using the
2954 result of one of the COND_EXPRs in the other COND_EXPR and leave
2955 BIT_AND_EXPR stmt out:
2957 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2958 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2959 S4' f_T = c_T;
2961 At least when VEC_COND_EXPR is implemented using masks
2962 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2963 computes the comparison masks and ands it, in one case with
2964 all ones vector, in the other case with a vector register.
2965 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2966 often more expensive. */
2967 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2968 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2969 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2971 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2972 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2973 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2974 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2976 gimple tstmt;
2977 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2978 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2979 tstmt = stmts->pop ();
2980 gcc_assert (tstmt == def_stmt);
2981 stmts->quick_push (stmt);
2982 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2983 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2984 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2985 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2986 return irhs2;
2988 else
2989 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2990 goto and_ior_xor;
2992 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2993 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2994 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2996 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2997 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2998 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2999 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3001 gimple tstmt;
3002 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3003 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3004 tstmt = stmts->pop ();
3005 gcc_assert (tstmt == def_stmt);
3006 stmts->quick_push (stmt);
3007 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3008 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3009 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3010 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3011 return irhs1;
3013 else
3014 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3015 goto and_ior_xor;
3017 /* FALLTHRU */
3018 case BIT_IOR_EXPR:
3019 case BIT_XOR_EXPR:
3020 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3021 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3022 and_ior_xor:
3023 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3024 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3026 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3027 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3028 int out_prec = TYPE_PRECISION (out_type);
3029 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3030 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3031 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3032 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3033 else
3035 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3036 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3039 itype = TREE_TYPE (irhs1);
3040 pattern_stmt
3041 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3042 rhs_code, irhs1, irhs2);
3043 break;
3045 default:
3046 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3047 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3048 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3049 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3050 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3052 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3053 itype
3054 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3056 else
3057 itype = TREE_TYPE (rhs1);
3058 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3059 if (trueval == NULL_TREE)
3060 trueval = build_int_cst (itype, 1);
3061 else
3062 gcc_checking_assert (useless_type_conversion_p (itype,
3063 TREE_TYPE (trueval)));
3064 pattern_stmt
3065 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3066 COND_EXPR, cond_expr, trueval,
3067 build_int_cst (itype, 0));
3068 break;
3071 stmts->safe_push (stmt);
3072 gimple_set_location (pattern_stmt, loc);
3073 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3074 return gimple_assign_lhs (pattern_stmt);
3078 /* Function vect_recog_bool_pattern
3080 Try to find pattern like following:
3082 bool a_b, b_b, c_b, d_b, e_b;
3083 TYPE f_T;
3084 loop:
3085 S1 a_b = x1 CMP1 y1;
3086 S2 b_b = x2 CMP2 y2;
3087 S3 c_b = a_b & b_b;
3088 S4 d_b = x3 CMP3 y3;
3089 S5 e_b = c_b | d_b;
3090 S6 f_T = (TYPE) e_b;
3092 where type 'TYPE' is an integral type. Or a similar pattern
3093 ending in
3095 S6 f_Y = e_b ? r_Y : s_Y;
3097 as results from if-conversion of a complex condition.
3099 Input:
3101 * LAST_STMT: A stmt at the end from which the pattern
3102 search begins, i.e. cast of a bool to
3103 an integer type.
3105 Output:
3107 * TYPE_IN: The type of the input arguments to the pattern.
3109 * TYPE_OUT: The type of the output of this pattern.
3111 * Return value: A new stmt that will be used to replace the pattern.
3113 Assuming size of TYPE is the same as size of all comparisons
3114 (otherwise some casts would be added where needed), the above
3115 sequence we create related pattern stmts:
3116 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3117 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3118 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3119 S5' e_T = c_T | d_T;
3120 S6' f_T = e_T;
3122 Instead of the above S3' we could emit:
3123 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3124 S3' c_T = a_T | b_T;
3125 but the above is more efficient. */
3127 static gimple
3128 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
3129 tree *type_out)
3131 gimple last_stmt = stmts->pop ();
3132 enum tree_code rhs_code;
3133 tree var, lhs, rhs, vectype;
3134 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3135 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3136 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
3137 gimple pattern_stmt;
3139 if (!is_gimple_assign (last_stmt))
3140 return NULL;
3142 var = gimple_assign_rhs1 (last_stmt);
3143 lhs = gimple_assign_lhs (last_stmt);
3145 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3146 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3147 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3148 return NULL;
3150 rhs_code = gimple_assign_rhs_code (last_stmt);
3151 if (CONVERT_EXPR_CODE_P (rhs_code))
3153 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3154 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3155 return NULL;
3156 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3157 if (vectype == NULL_TREE)
3158 return NULL;
3160 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3161 return NULL;
3163 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3164 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3165 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3166 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3167 else
3168 pattern_stmt
3169 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3170 *type_out = vectype;
3171 *type_in = vectype;
3172 stmts->safe_push (last_stmt);
3173 if (dump_enabled_p ())
3174 dump_printf_loc (MSG_NOTE, vect_location,
3175 "vect_recog_bool_pattern: detected:\n");
3177 return pattern_stmt;
3179 else if (rhs_code == COND_EXPR
3180 && TREE_CODE (var) == SSA_NAME)
3182 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3183 if (vectype == NULL_TREE)
3184 return NULL;
3186 /* Build a scalar type for the boolean result that when
3187 vectorized matches the vector type of the result in
3188 size and number of elements. */
3189 unsigned prec
3190 = wi::udiv_trunc (TYPE_SIZE (vectype),
3191 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3192 tree type
3193 = build_nonstandard_integer_type (prec,
3194 TYPE_UNSIGNED (TREE_TYPE (var)));
3195 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3196 return NULL;
3198 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3199 return NULL;
3201 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3202 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3203 pattern_stmt
3204 = gimple_build_assign (lhs, COND_EXPR,
3205 build2 (NE_EXPR, boolean_type_node,
3206 rhs, build_int_cst (type, 0)),
3207 gimple_assign_rhs2 (last_stmt),
3208 gimple_assign_rhs3 (last_stmt));
3209 *type_out = vectype;
3210 *type_in = vectype;
3211 stmts->safe_push (last_stmt);
3212 if (dump_enabled_p ())
3213 dump_printf_loc (MSG_NOTE, vect_location,
3214 "vect_recog_bool_pattern: detected:\n");
3216 return pattern_stmt;
3218 else if (rhs_code == SSA_NAME
3219 && STMT_VINFO_DATA_REF (stmt_vinfo))
3221 stmt_vec_info pattern_stmt_info;
3222 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3223 gcc_assert (vectype != NULL_TREE);
3224 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3225 return NULL;
3226 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3227 return NULL;
3229 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3230 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3231 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3233 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3234 gimple cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3235 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3236 rhs = rhs2;
3238 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3239 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3240 bb_vinfo);
3241 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3242 STMT_VINFO_DATA_REF (pattern_stmt_info)
3243 = STMT_VINFO_DATA_REF (stmt_vinfo);
3244 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3245 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3246 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3247 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3248 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3249 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3250 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3251 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3252 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3253 *type_out = vectype;
3254 *type_in = vectype;
3255 stmts->safe_push (last_stmt);
3256 if (dump_enabled_p ())
3257 dump_printf_loc (MSG_NOTE, vect_location,
3258 "vect_recog_bool_pattern: detected:\n");
3259 return pattern_stmt;
3261 else
3262 return NULL;
3266 /* Mark statements that are involved in a pattern. */
3268 static inline void
3269 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3270 tree pattern_vectype)
3272 stmt_vec_info pattern_stmt_info, def_stmt_info;
3273 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3274 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3275 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3276 gimple def_stmt;
3278 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3279 if (pattern_stmt_info == NULL)
3281 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3282 bb_vinfo);
3283 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3285 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3287 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3288 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3289 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3290 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3291 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3292 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3293 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3294 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3295 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3297 gimple_stmt_iterator si;
3298 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3299 !gsi_end_p (si); gsi_next (&si))
3301 def_stmt = gsi_stmt (si);
3302 def_stmt_info = vinfo_for_stmt (def_stmt);
3303 if (def_stmt_info == NULL)
3305 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3306 bb_vinfo);
3307 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3309 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3310 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3311 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3312 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3313 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3318 /* Function vect_pattern_recog_1
3320 Input:
3321 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3322 computation pattern.
3323 STMT: A stmt from which the pattern search should start.
3325 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3326 expression that computes the same functionality and can be used to
3327 replace the sequence of stmts that are involved in the pattern.
3329 Output:
3330 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3331 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3332 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3333 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3334 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3335 to the available target pattern.
3337 This function also does some bookkeeping, as explained in the documentation
3338 for vect_recog_pattern. */
3340 static void
3341 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3342 gimple_stmt_iterator si,
3343 vec<gimple> *stmts_to_replace)
3345 gimple stmt = gsi_stmt (si), pattern_stmt;
3346 stmt_vec_info stmt_info;
3347 loop_vec_info loop_vinfo;
3348 tree pattern_vectype;
3349 tree type_in, type_out;
3350 enum tree_code code;
3351 int i;
3352 gimple next;
3354 stmts_to_replace->truncate (0);
3355 stmts_to_replace->quick_push (stmt);
3356 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3357 if (!pattern_stmt)
3358 return;
3360 stmt = stmts_to_replace->last ();
3361 stmt_info = vinfo_for_stmt (stmt);
3362 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3364 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3366 /* No need to check target support (already checked by the pattern
3367 recognition function). */
3368 pattern_vectype = type_out ? type_out : type_in;
3370 else
3372 machine_mode vec_mode;
3373 enum insn_code icode;
3374 optab optab;
3376 /* Check target support */
3377 type_in = get_vectype_for_scalar_type (type_in);
3378 if (!type_in)
3379 return;
3380 if (type_out)
3381 type_out = get_vectype_for_scalar_type (type_out);
3382 else
3383 type_out = type_in;
3384 if (!type_out)
3385 return;
3386 pattern_vectype = type_out;
3388 if (is_gimple_assign (pattern_stmt))
3389 code = gimple_assign_rhs_code (pattern_stmt);
3390 else
3392 gcc_assert (is_gimple_call (pattern_stmt));
3393 code = CALL_EXPR;
3396 optab = optab_for_tree_code (code, type_in, optab_default);
3397 vec_mode = TYPE_MODE (type_in);
3398 if (!optab
3399 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3400 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3401 return;
3404 /* Found a vectorizable pattern. */
3405 if (dump_enabled_p ())
3407 dump_printf_loc (MSG_NOTE, vect_location,
3408 "pattern recognized: ");
3409 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3412 /* Mark the stmts that are involved in the pattern. */
3413 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3415 /* Patterns cannot be vectorized using SLP, because they change the order of
3416 computation. */
3417 if (loop_vinfo)
3418 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3419 if (next == stmt)
3420 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3422 /* It is possible that additional pattern stmts are created and inserted in
3423 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3424 relevant statements. */
3425 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3426 && (unsigned) i < (stmts_to_replace->length () - 1);
3427 i++)
3429 stmt_info = vinfo_for_stmt (stmt);
3430 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3431 if (dump_enabled_p ())
3433 dump_printf_loc (MSG_NOTE, vect_location,
3434 "additional pattern stmt: ");
3435 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3438 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3443 /* Function vect_pattern_recog
3445 Input:
3446 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3447 computation idioms.
3449 Output - for each computation idiom that is detected we create a new stmt
3450 that provides the same functionality and that can be vectorized. We
3451 also record some information in the struct_stmt_info of the relevant
3452 stmts, as explained below:
3454 At the entry to this function we have the following stmts, with the
3455 following initial value in the STMT_VINFO fields:
3457 stmt in_pattern_p related_stmt vec_stmt
3458 S1: a_i = .... - - -
3459 S2: a_2 = ..use(a_i).. - - -
3460 S3: a_1 = ..use(a_2).. - - -
3461 S4: a_0 = ..use(a_1).. - - -
3462 S5: ... = ..use(a_0).. - - -
3464 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3465 represented by a single stmt. We then:
3466 - create a new stmt S6 equivalent to the pattern (the stmt is not
3467 inserted into the code)
3468 - fill in the STMT_VINFO fields as follows:
3470 in_pattern_p related_stmt vec_stmt
3471 S1: a_i = .... - - -
3472 S2: a_2 = ..use(a_i).. - - -
3473 S3: a_1 = ..use(a_2).. - - -
3474 S4: a_0 = ..use(a_1).. true S6 -
3475 '---> S6: a_new = .... - S4 -
3476 S5: ... = ..use(a_0).. - - -
3478 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3479 to each other through the RELATED_STMT field).
3481 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3482 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3483 remain irrelevant unless used by stmts other than S4.
3485 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3486 (because they are marked as irrelevant). It will vectorize S6, and record
3487 a pointer to the new vector stmt VS6 from S6 (as usual).
3488 S4 will be skipped, and S5 will be vectorized as usual:
3490 in_pattern_p related_stmt vec_stmt
3491 S1: a_i = .... - - -
3492 S2: a_2 = ..use(a_i).. - - -
3493 S3: a_1 = ..use(a_2).. - - -
3494 > VS6: va_new = .... - - -
3495 S4: a_0 = ..use(a_1).. true S6 VS6
3496 '---> S6: a_new = .... - S4 VS6
3497 > VS5: ... = ..vuse(va_new).. - - -
3498 S5: ... = ..use(a_0).. - - -
3500 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3501 elsewhere), and we'll end up with:
3503 VS6: va_new = ....
3504 VS5: ... = ..vuse(va_new)..
3506 In case of more than one pattern statements, e.g., widen-mult with
3507 intermediate type:
3509 S1 a_t = ;
3510 S2 a_T = (TYPE) a_t;
3511 '--> S3: a_it = (interm_type) a_t;
3512 S4 prod_T = a_T * CONST;
3513 '--> S5: prod_T' = a_it w* CONST;
3515 there may be other users of a_T outside the pattern. In that case S2 will
3516 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3517 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3518 be recorded in S3. */
3520 void
3521 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3523 struct loop *loop;
3524 basic_block *bbs;
3525 unsigned int nbbs;
3526 gimple_stmt_iterator si;
3527 unsigned int i, j;
3528 vect_recog_func_ptr vect_recog_func;
3529 auto_vec<gimple, 1> stmts_to_replace;
3530 gimple stmt;
3532 if (dump_enabled_p ())
3533 dump_printf_loc (MSG_NOTE, vect_location,
3534 "=== vect_pattern_recog ===\n");
3536 if (loop_vinfo)
3538 loop = LOOP_VINFO_LOOP (loop_vinfo);
3539 bbs = LOOP_VINFO_BBS (loop_vinfo);
3540 nbbs = loop->num_nodes;
3542 else
3544 bbs = &BB_VINFO_BB (bb_vinfo);
3545 nbbs = 1;
3548 /* Scan through the loop stmts, applying the pattern recognition
3549 functions starting at each stmt visited: */
3550 for (i = 0; i < nbbs; i++)
3552 basic_block bb = bbs[i];
3553 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3555 if (bb_vinfo && (stmt = gsi_stmt (si))
3556 && vinfo_for_stmt (stmt)
3557 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3558 continue;
3560 /* Scan over all generic vect_recog_xxx_pattern functions. */
3561 for (j = 0; j < NUM_PATTERNS; j++)
3563 vect_recog_func = vect_vect_recog_func_ptrs[j];
3564 vect_pattern_recog_1 (vect_recog_func, si,
3565 &stmts_to_replace);