2015-06-11 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-patterns.c
blob786adf29ee0595b4ae447a219e5749e9c4ec1525
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "input.h"
26 #include "alias.h"
27 #include "symtab.h"
28 #include "tree.h"
29 #include "fold-const.h"
30 #include "stor-layout.h"
31 #include "target.h"
32 #include "predict.h"
33 #include "hard-reg-set.h"
34 #include "function.h"
35 #include "dominance.h"
36 #include "basic-block.h"
37 #include "gimple-pretty-print.h"
38 #include "tree-ssa-alias.h"
39 #include "internal-fn.h"
40 #include "tree-eh.h"
41 #include "gimple-expr.h"
42 #include "is-a.h"
43 #include "gimple.h"
44 #include "gimplify.h"
45 #include "gimple-iterator.h"
46 #include "gimple-ssa.h"
47 #include "tree-phinodes.h"
48 #include "ssa-iterators.h"
49 #include "stringpool.h"
50 #include "tree-ssanames.h"
51 #include "cfgloop.h"
52 #include "rtl.h"
53 #include "flags.h"
54 #include "insn-config.h"
55 #include "expmed.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "calls.h"
59 #include "emit-rtl.h"
60 #include "varasm.h"
61 #include "stmt.h"
62 #include "expr.h"
63 #include "insn-codes.h"
64 #include "optabs.h"
65 #include "params.h"
66 #include "tree-data-ref.h"
67 #include "tree-vectorizer.h"
68 #include "recog.h" /* FIXME: for insn_data */
69 #include "diagnostic-core.h"
70 #include "dumpfile.h"
71 #include "builtins.h"
73 /* Pattern recognition functions */
74 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
75 tree *);
76 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
77 tree *);
78 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
79 tree *);
80 static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
81 tree *);
82 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
83 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
84 tree *);
85 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
86 tree *, tree *);
87 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
88 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
89 tree *, tree *);
90 static gimple vect_recog_divmod_pattern (vec<gimple> *,
91 tree *, tree *);
92 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
93 tree *, tree *);
94 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
95 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
96 vect_recog_widen_mult_pattern,
97 vect_recog_widen_sum_pattern,
98 vect_recog_dot_prod_pattern,
99 vect_recog_sad_pattern,
100 vect_recog_pow_pattern,
101 vect_recog_widen_shift_pattern,
102 vect_recog_over_widening_pattern,
103 vect_recog_rotate_pattern,
104 vect_recog_vector_vector_shift_pattern,
105 vect_recog_divmod_pattern,
106 vect_recog_mixed_size_cond_pattern,
107 vect_recog_bool_pattern};
109 static inline void
110 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
112 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
113 stmt);
116 static inline void
117 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
119 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
120 append_pattern_def_seq (stmt_info, stmt);
123 /* Check whether STMT2 is in the same loop or basic block as STMT1.
124 Which of the two applies depends on whether we're currently doing
125 loop-based or basic-block-based vectorization, as determined by
126 the vinfo_for_stmt for STMT1 (which must be defined).
128 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
129 to be defined as well. */
131 static bool
132 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
134 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
135 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
136 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
138 if (!gimple_bb (stmt2))
139 return false;
141 if (loop_vinfo)
143 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
144 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
145 return false;
147 else
149 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
150 || gimple_code (stmt2) == GIMPLE_PHI)
151 return false;
154 gcc_assert (vinfo_for_stmt (stmt2));
155 return true;
158 /* If the LHS of DEF_STMT has a single use, and that statement is
159 in the same loop or basic block, return it. */
161 static gimple
162 vect_single_imm_use (gimple def_stmt)
164 tree lhs = gimple_assign_lhs (def_stmt);
165 use_operand_p use_p;
166 gimple use_stmt;
168 if (!single_imm_use (lhs, &use_p, &use_stmt))
169 return NULL;
171 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
172 return NULL;
174 return use_stmt;
177 /* Check whether NAME, an ssa-name used in USE_STMT,
178 is a result of a type promotion, such that:
179 DEF_STMT: NAME = NOP (name0)
180 If CHECK_SIGN is TRUE, check that either both types are signed or both are
181 unsigned. */
183 static bool
184 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
185 tree *orig_type, gimple *def_stmt, bool *promotion)
187 tree dummy;
188 gimple dummy_gimple;
189 loop_vec_info loop_vinfo;
190 stmt_vec_info stmt_vinfo;
191 tree type = TREE_TYPE (name);
192 tree oprnd0;
193 enum vect_def_type dt;
194 tree def;
195 bb_vec_info bb_vinfo;
197 stmt_vinfo = vinfo_for_stmt (use_stmt);
198 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
199 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
200 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
201 &def, &dt))
202 return false;
204 if (dt != vect_internal_def
205 && dt != vect_external_def && dt != vect_constant_def)
206 return false;
208 if (!*def_stmt)
209 return false;
211 if (!is_gimple_assign (*def_stmt))
212 return false;
214 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
215 return false;
217 oprnd0 = gimple_assign_rhs1 (*def_stmt);
219 *orig_type = TREE_TYPE (oprnd0);
220 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
221 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
222 return false;
224 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
225 *promotion = true;
226 else
227 *promotion = false;
229 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
230 bb_vinfo, &dummy_gimple, &dummy, &dt))
231 return false;
233 return true;
236 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
237 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
239 static tree
240 vect_recog_temp_ssa_var (tree type, gimple stmt)
242 return make_temp_ssa_name (type, stmt, "patt");
245 /* Function vect_recog_dot_prod_pattern
247 Try to find the following pattern:
249 type x_t, y_t;
250 TYPE1 prod;
251 TYPE2 sum = init;
252 loop:
253 sum_0 = phi <init, sum_1>
254 S1 x_t = ...
255 S2 y_t = ...
256 S3 x_T = (TYPE1) x_t;
257 S4 y_T = (TYPE1) y_t;
258 S5 prod = x_T * y_T;
259 [S6 prod = (TYPE2) prod; #optional]
260 S7 sum_1 = prod + sum_0;
262 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
263 same size of 'TYPE1' or bigger. This is a special case of a reduction
264 computation.
266 Input:
268 * STMTS: Contains a stmt from which the pattern search begins. In the
269 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
270 will be detected.
272 Output:
274 * TYPE_IN: The type of the input arguments to the pattern.
276 * TYPE_OUT: The type of the output of this pattern.
278 * Return value: A new stmt that will be used to replace the sequence of
279 stmts that constitute the pattern. In this case it will be:
280 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
282 Note: The dot-prod idiom is a widening reduction pattern that is
283 vectorized without preserving all the intermediate results. It
284 produces only N/2 (widened) results (by summing up pairs of
285 intermediate results) rather than all N results. Therefore, we
286 cannot allow this pattern when we want to get all the results and in
287 the correct order (as is the case when this computation is in an
288 inner-loop nested in an outer-loop that us being vectorized). */
290 static gimple
291 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
292 tree *type_out)
294 gimple stmt, last_stmt = (*stmts)[0];
295 tree oprnd0, oprnd1;
296 tree oprnd00, oprnd01;
297 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
298 tree type, half_type;
299 gimple pattern_stmt;
300 tree prod_type;
301 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
302 struct loop *loop;
303 tree var;
304 bool promotion;
306 if (!loop_info)
307 return NULL;
309 loop = LOOP_VINFO_LOOP (loop_info);
311 /* We don't allow changing the order of the computation in the inner-loop
312 when doing outer-loop vectorization. */
313 if (loop && nested_in_vect_loop_p (loop, last_stmt))
314 return NULL;
316 if (!is_gimple_assign (last_stmt))
317 return NULL;
319 type = gimple_expr_type (last_stmt);
321 /* Look for the following pattern
322 DX = (TYPE1) X;
323 DY = (TYPE1) Y;
324 DPROD = DX * DY;
325 DDPROD = (TYPE2) DPROD;
326 sum_1 = DDPROD + sum_0;
327 In which
328 - DX is double the size of X
329 - DY is double the size of Y
330 - DX, DY, DPROD all have the same type
331 - sum is the same size of DPROD or bigger
332 - sum has been recognized as a reduction variable.
334 This is equivalent to:
335 DPROD = X w* Y; #widen mult
336 sum_1 = DPROD w+ sum_0; #widen summation
338 DPROD = X w* Y; #widen mult
339 sum_1 = DPROD + sum_0; #summation
342 /* Starting from LAST_STMT, follow the defs of its uses in search
343 of the above pattern. */
345 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
346 return NULL;
348 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
350 /* Has been detected as widening-summation? */
352 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
353 type = gimple_expr_type (stmt);
354 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
355 return NULL;
356 oprnd0 = gimple_assign_rhs1 (stmt);
357 oprnd1 = gimple_assign_rhs2 (stmt);
358 half_type = TREE_TYPE (oprnd0);
360 else
362 gimple def_stmt;
364 oprnd0 = gimple_assign_rhs1 (last_stmt);
365 oprnd1 = gimple_assign_rhs2 (last_stmt);
366 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
367 || !types_compatible_p (TREE_TYPE (oprnd1), type))
368 return NULL;
369 stmt = last_stmt;
371 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
372 &promotion)
373 && promotion)
375 stmt = def_stmt;
376 oprnd0 = gimple_assign_rhs1 (stmt);
378 else
379 half_type = type;
382 /* So far so good. Since last_stmt was detected as a (summation) reduction,
383 we know that oprnd1 is the reduction variable (defined by a loop-header
384 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
385 Left to check that oprnd0 is defined by a (widen_)mult_expr */
386 if (TREE_CODE (oprnd0) != SSA_NAME)
387 return NULL;
389 prod_type = half_type;
390 stmt = SSA_NAME_DEF_STMT (oprnd0);
392 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
393 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
394 return NULL;
396 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
397 inside the loop (in case we are analyzing an outer-loop). */
398 if (!is_gimple_assign (stmt))
399 return NULL;
400 stmt_vinfo = vinfo_for_stmt (stmt);
401 gcc_assert (stmt_vinfo);
402 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
403 return NULL;
404 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
405 return NULL;
406 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
408 /* Has been detected as a widening multiplication? */
410 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
411 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
412 return NULL;
413 stmt_vinfo = vinfo_for_stmt (stmt);
414 gcc_assert (stmt_vinfo);
415 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
416 oprnd00 = gimple_assign_rhs1 (stmt);
417 oprnd01 = gimple_assign_rhs2 (stmt);
418 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
419 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
421 else
423 tree half_type0, half_type1;
424 gimple def_stmt;
425 tree oprnd0, oprnd1;
427 oprnd0 = gimple_assign_rhs1 (stmt);
428 oprnd1 = gimple_assign_rhs2 (stmt);
429 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
430 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
431 return NULL;
432 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
433 &promotion)
434 || !promotion)
435 return NULL;
436 oprnd00 = gimple_assign_rhs1 (def_stmt);
437 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
438 &promotion)
439 || !promotion)
440 return NULL;
441 oprnd01 = gimple_assign_rhs1 (def_stmt);
442 if (!types_compatible_p (half_type0, half_type1))
443 return NULL;
444 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
445 return NULL;
448 half_type = TREE_TYPE (oprnd00);
449 *type_in = half_type;
450 *type_out = type;
452 /* Pattern detected. Create a stmt to be used to replace the pattern: */
453 var = vect_recog_temp_ssa_var (type, NULL);
454 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
455 oprnd00, oprnd01, oprnd1);
457 if (dump_enabled_p ())
459 dump_printf_loc (MSG_NOTE, vect_location,
460 "vect_recog_dot_prod_pattern: detected: ");
461 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
462 dump_printf (MSG_NOTE, "\n");
465 return pattern_stmt;
469 /* Function vect_recog_sad_pattern
471 Try to find the following Sum of Absolute Difference (SAD) pattern:
473 type x_t, y_t;
474 signed TYPE1 diff, abs_diff;
475 TYPE2 sum = init;
476 loop:
477 sum_0 = phi <init, sum_1>
478 S1 x_t = ...
479 S2 y_t = ...
480 S3 x_T = (TYPE1) x_t;
481 S4 y_T = (TYPE1) y_t;
482 S5 diff = x_T - y_T;
483 S6 abs_diff = ABS_EXPR <diff>;
484 [S7 abs_diff = (TYPE2) abs_diff; #optional]
485 S8 sum_1 = abs_diff + sum_0;
487 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
488 same size of 'TYPE1' or bigger. This is a special case of a reduction
489 computation.
491 Input:
493 * STMTS: Contains a stmt from which the pattern search begins. In the
494 example, when this function is called with S8, the pattern
495 {S3,S4,S5,S6,S7,S8} will be detected.
497 Output:
499 * TYPE_IN: The type of the input arguments to the pattern.
501 * TYPE_OUT: The type of the output of this pattern.
503 * Return value: A new stmt that will be used to replace the sequence of
504 stmts that constitute the pattern. In this case it will be:
505 SAD_EXPR <x_t, y_t, sum_0>
508 static gimple
509 vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
510 tree *type_out)
512 gimple last_stmt = (*stmts)[0];
513 tree sad_oprnd0, sad_oprnd1;
514 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
515 tree half_type;
516 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
517 struct loop *loop;
518 bool promotion;
520 if (!loop_info)
521 return NULL;
523 loop = LOOP_VINFO_LOOP (loop_info);
525 /* We don't allow changing the order of the computation in the inner-loop
526 when doing outer-loop vectorization. */
527 if (loop && nested_in_vect_loop_p (loop, last_stmt))
528 return NULL;
530 if (!is_gimple_assign (last_stmt))
531 return NULL;
533 tree sum_type = gimple_expr_type (last_stmt);
535 /* Look for the following pattern
536 DX = (TYPE1) X;
537 DY = (TYPE1) Y;
538 DDIFF = DX - DY;
539 DAD = ABS_EXPR <DDIFF>;
540 DDPROD = (TYPE2) DPROD;
541 sum_1 = DAD + sum_0;
542 In which
543 - DX is at least double the size of X
544 - DY is at least double the size of Y
545 - DX, DY, DDIFF, DAD all have the same type
546 - sum is the same size of DAD or bigger
547 - sum has been recognized as a reduction variable.
549 This is equivalent to:
550 DDIFF = X w- Y; #widen sub
551 DAD = ABS_EXPR <DDIFF>;
552 sum_1 = DAD w+ sum_0; #widen summation
554 DDIFF = X w- Y; #widen sub
555 DAD = ABS_EXPR <DDIFF>;
556 sum_1 = DAD + sum_0; #summation
559 /* Starting from LAST_STMT, follow the defs of its uses in search
560 of the above pattern. */
562 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
563 return NULL;
565 tree plus_oprnd0, plus_oprnd1;
567 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
569 /* Has been detected as widening-summation? */
571 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
572 sum_type = gimple_expr_type (stmt);
573 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
574 return NULL;
575 plus_oprnd0 = gimple_assign_rhs1 (stmt);
576 plus_oprnd1 = gimple_assign_rhs2 (stmt);
577 half_type = TREE_TYPE (plus_oprnd0);
579 else
581 gimple def_stmt;
583 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
584 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
585 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
586 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
587 return NULL;
589 /* The type conversion could be promotion, demotion,
590 or just signed -> unsigned. */
591 if (type_conversion_p (plus_oprnd0, last_stmt, false,
592 &half_type, &def_stmt, &promotion))
593 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
594 else
595 half_type = sum_type;
598 /* So far so good. Since last_stmt was detected as a (summation) reduction,
599 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
600 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
601 Then check that plus_oprnd0 is defined by an abs_expr. */
603 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
604 return NULL;
606 tree abs_type = half_type;
607 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
609 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
610 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
611 return NULL;
613 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
614 inside the loop (in case we are analyzing an outer-loop). */
615 if (!is_gimple_assign (abs_stmt))
616 return NULL;
618 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
619 gcc_assert (abs_stmt_vinfo);
620 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
621 return NULL;
622 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
623 return NULL;
625 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
626 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
627 return NULL;
628 if (TYPE_UNSIGNED (abs_type))
629 return NULL;
631 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
633 if (TREE_CODE (abs_oprnd) != SSA_NAME)
634 return NULL;
636 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
638 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
639 if (!gimple_bb (diff_stmt)
640 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
641 return NULL;
643 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
644 inside the loop (in case we are analyzing an outer-loop). */
645 if (!is_gimple_assign (diff_stmt))
646 return NULL;
648 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
649 gcc_assert (diff_stmt_vinfo);
650 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
651 return NULL;
652 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
653 return NULL;
655 tree half_type0, half_type1;
656 gimple def_stmt;
658 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
659 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
661 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
662 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
663 return NULL;
664 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
665 &half_type0, &def_stmt, &promotion)
666 || !promotion)
667 return NULL;
668 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
670 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
671 &half_type1, &def_stmt, &promotion)
672 || !promotion)
673 return NULL;
674 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
676 if (!types_compatible_p (half_type0, half_type1))
677 return NULL;
678 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
679 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
680 return NULL;
682 *type_in = TREE_TYPE (sad_oprnd0);
683 *type_out = sum_type;
685 /* Pattern detected. Create a stmt to be used to replace the pattern: */
686 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
687 gimple pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
688 sad_oprnd1, plus_oprnd1);
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location,
693 "vect_recog_sad_pattern: detected: ");
694 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
695 dump_printf (MSG_NOTE, "\n");
698 return pattern_stmt;
702 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
703 and LSHIFT_EXPR.
705 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
706 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
708 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
709 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
710 that satisfies the above restrictions, we can perform a widening opeartion
711 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
712 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
714 static bool
715 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
716 tree const_oprnd, tree *oprnd,
717 gimple *wstmt, tree type,
718 tree *half_type, gimple def_stmt)
720 tree new_type, new_oprnd;
722 if (code != MULT_EXPR && code != LSHIFT_EXPR)
723 return false;
725 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
726 || (code == LSHIFT_EXPR
727 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
728 != 1))
729 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
731 /* CONST_OPRND is a constant of HALF_TYPE. */
732 *oprnd = gimple_assign_rhs1 (def_stmt);
733 return true;
736 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
737 return false;
739 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
740 return false;
742 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
743 a type 2 times bigger than HALF_TYPE. */
744 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
745 TYPE_UNSIGNED (type));
746 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
747 || (code == LSHIFT_EXPR
748 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
749 return false;
751 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
752 *oprnd = gimple_assign_rhs1 (def_stmt);
753 new_oprnd = make_ssa_name (new_type);
754 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
755 *oprnd = new_oprnd;
757 *half_type = new_type;
758 return true;
762 /* Function vect_recog_widen_mult_pattern
764 Try to find the following pattern:
766 type1 a_t;
767 type2 b_t;
768 TYPE a_T, b_T, prod_T;
770 S1 a_t = ;
771 S2 b_t = ;
772 S3 a_T = (TYPE) a_t;
773 S4 b_T = (TYPE) b_t;
774 S5 prod_T = a_T * b_T;
776 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
778 Also detect unsigned cases:
780 unsigned type1 a_t;
781 unsigned type2 b_t;
782 unsigned TYPE u_prod_T;
783 TYPE a_T, b_T, prod_T;
785 S1 a_t = ;
786 S2 b_t = ;
787 S3 a_T = (TYPE) a_t;
788 S4 b_T = (TYPE) b_t;
789 S5 prod_T = a_T * b_T;
790 S6 u_prod_T = (unsigned TYPE) prod_T;
792 and multiplication by constants:
794 type a_t;
795 TYPE a_T, prod_T;
797 S1 a_t = ;
798 S3 a_T = (TYPE) a_t;
799 S5 prod_T = a_T * CONST;
801 A special case of multiplication by constants is when 'TYPE' is 4 times
802 bigger than 'type', but CONST fits an intermediate type 2 times smaller
803 than 'TYPE'. In that case we create an additional pattern stmt for S3
804 to create a variable of the intermediate type, and perform widen-mult
805 on the intermediate type as well:
807 type a_t;
808 interm_type a_it;
809 TYPE a_T, prod_T, prod_T';
811 S1 a_t = ;
812 S3 a_T = (TYPE) a_t;
813 '--> a_it = (interm_type) a_t;
814 S5 prod_T = a_T * CONST;
815 '--> prod_T' = a_it w* CONST;
817 Input/Output:
819 * STMTS: Contains a stmt from which the pattern search begins. In the
820 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
821 is detected. In case of unsigned widen-mult, the original stmt (S5) is
822 replaced with S6 in STMTS. In case of multiplication by a constant
823 of an intermediate type (the last case above), STMTS also contains S3
824 (inserted before S5).
826 Output:
828 * TYPE_IN: The type of the input arguments to the pattern.
830 * TYPE_OUT: The type of the output of this pattern.
832 * Return value: A new stmt that will be used to replace the sequence of
833 stmts that constitute the pattern. In this case it will be:
834 WIDEN_MULT <a_t, b_t>
835 If the result of WIDEN_MULT needs to be converted to a larger type, the
836 returned stmt will be this type conversion stmt.
839 static gimple
840 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
841 tree *type_in, tree *type_out)
843 gimple last_stmt = stmts->pop ();
844 gimple def_stmt0, def_stmt1;
845 tree oprnd0, oprnd1;
846 tree type, half_type0, half_type1;
847 gimple new_stmt = NULL, pattern_stmt = NULL;
848 tree vectype, vecitype;
849 tree var;
850 enum tree_code dummy_code;
851 int dummy_int;
852 vec<tree> dummy_vec;
853 bool op1_ok;
854 bool promotion;
856 if (!is_gimple_assign (last_stmt))
857 return NULL;
859 type = gimple_expr_type (last_stmt);
861 /* Starting from LAST_STMT, follow the defs of its uses in search
862 of the above pattern. */
864 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
865 return NULL;
867 oprnd0 = gimple_assign_rhs1 (last_stmt);
868 oprnd1 = gimple_assign_rhs2 (last_stmt);
869 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
870 || !types_compatible_p (TREE_TYPE (oprnd1), type))
871 return NULL;
873 /* Check argument 0. */
874 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
875 &promotion)
876 || !promotion)
877 return NULL;
878 /* Check argument 1. */
879 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
880 &def_stmt1, &promotion);
882 if (op1_ok && promotion)
884 oprnd0 = gimple_assign_rhs1 (def_stmt0);
885 oprnd1 = gimple_assign_rhs1 (def_stmt1);
887 else
889 if (TREE_CODE (oprnd1) == INTEGER_CST
890 && TREE_CODE (half_type0) == INTEGER_TYPE
891 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
892 &oprnd0, &new_stmt, type,
893 &half_type0, def_stmt0))
895 half_type1 = half_type0;
896 oprnd1 = fold_convert (half_type1, oprnd1);
898 else
899 return NULL;
902 /* If the two arguments have different sizes, convert the one with
903 the smaller type into the larger type. */
904 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
906 /* If we already used up the single-stmt slot give up. */
907 if (new_stmt)
908 return NULL;
910 tree* oprnd = NULL;
911 gimple def_stmt = NULL;
913 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
915 def_stmt = def_stmt0;
916 half_type0 = half_type1;
917 oprnd = &oprnd0;
919 else
921 def_stmt = def_stmt1;
922 half_type1 = half_type0;
923 oprnd = &oprnd1;
926 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
927 tree new_oprnd = make_ssa_name (half_type0);
928 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
929 *oprnd = new_oprnd;
932 /* Handle unsigned case. Look for
933 S6 u_prod_T = (unsigned TYPE) prod_T;
934 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
935 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
937 gimple use_stmt;
938 tree use_lhs;
939 tree use_type;
941 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
942 return NULL;
944 use_stmt = vect_single_imm_use (last_stmt);
945 if (!use_stmt || !is_gimple_assign (use_stmt)
946 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
947 return NULL;
949 use_lhs = gimple_assign_lhs (use_stmt);
950 use_type = TREE_TYPE (use_lhs);
951 if (!INTEGRAL_TYPE_P (use_type)
952 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
953 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
954 return NULL;
956 type = use_type;
957 last_stmt = use_stmt;
960 if (!types_compatible_p (half_type0, half_type1))
961 return NULL;
963 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
964 to get an intermediate result of type ITYPE. In this case we need
965 to build a statement to convert this intermediate result to type TYPE. */
966 tree itype = type;
967 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
968 itype = build_nonstandard_integer_type
969 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
970 TYPE_UNSIGNED (type));
972 /* Pattern detected. */
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "vect_recog_widen_mult_pattern: detected:\n");
977 /* Check target support */
978 vectype = get_vectype_for_scalar_type (half_type0);
979 vecitype = get_vectype_for_scalar_type (itype);
980 if (!vectype
981 || !vecitype
982 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
983 vecitype, vectype,
984 &dummy_code, &dummy_code,
985 &dummy_int, &dummy_vec))
986 return NULL;
988 *type_in = vectype;
989 *type_out = get_vectype_for_scalar_type (type);
991 /* Pattern supported. Create a stmt to be used to replace the pattern: */
992 var = vect_recog_temp_ssa_var (itype, NULL);
993 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
995 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
996 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
997 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
998 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1000 /* If the original two operands have different sizes, we may need to convert
1001 the smaller one into the larget type. If this is the case, at this point
1002 the new stmt is already built. */
1003 if (new_stmt)
1005 append_pattern_def_seq (stmt_vinfo, new_stmt);
1006 stmt_vec_info new_stmt_info
1007 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
1008 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1009 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1012 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1013 the result of the widen-mult operation into type TYPE. */
1014 if (itype != type)
1016 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1017 stmt_vec_info pattern_stmt_info
1018 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1019 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1020 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1021 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1022 NOP_EXPR,
1023 gimple_assign_lhs (pattern_stmt));
1026 if (dump_enabled_p ())
1027 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1029 stmts->safe_push (last_stmt);
1030 return pattern_stmt;
1034 /* Function vect_recog_pow_pattern
1036 Try to find the following pattern:
1038 x = POW (y, N);
1040 with POW being one of pow, powf, powi, powif and N being
1041 either 2 or 0.5.
1043 Input:
1045 * LAST_STMT: A stmt from which the pattern search begins.
1047 Output:
1049 * TYPE_IN: The type of the input arguments to the pattern.
1051 * TYPE_OUT: The type of the output of this pattern.
1053 * Return value: A new stmt that will be used to replace the sequence of
1054 stmts that constitute the pattern. In this case it will be:
1055 x = x * x
1057 x = sqrt (x)
1060 static gimple
1061 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
1062 tree *type_out)
1064 gimple last_stmt = (*stmts)[0];
1065 tree fn, base, exp = NULL;
1066 gimple stmt;
1067 tree var;
1069 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1070 return NULL;
1072 fn = gimple_call_fndecl (last_stmt);
1073 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1074 return NULL;
1076 switch (DECL_FUNCTION_CODE (fn))
1078 case BUILT_IN_POWIF:
1079 case BUILT_IN_POWI:
1080 case BUILT_IN_POWF:
1081 case BUILT_IN_POW:
1082 base = gimple_call_arg (last_stmt, 0);
1083 exp = gimple_call_arg (last_stmt, 1);
1084 if (TREE_CODE (exp) != REAL_CST
1085 && TREE_CODE (exp) != INTEGER_CST)
1086 return NULL;
1087 break;
1089 default:
1090 return NULL;
1093 /* We now have a pow or powi builtin function call with a constant
1094 exponent. */
1096 *type_out = NULL_TREE;
1098 /* Catch squaring. */
1099 if ((tree_fits_shwi_p (exp)
1100 && tree_to_shwi (exp) == 2)
1101 || (TREE_CODE (exp) == REAL_CST
1102 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
1104 *type_in = TREE_TYPE (base);
1106 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1107 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1108 return stmt;
1111 /* Catch square root. */
1112 if (TREE_CODE (exp) == REAL_CST
1113 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1115 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1116 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1117 if (*type_in)
1119 gcall *stmt = gimple_build_call (newfn, 1, base);
1120 if (vectorizable_function (stmt, *type_in, *type_in)
1121 != NULL_TREE)
1123 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1124 gimple_call_set_lhs (stmt, var);
1125 return stmt;
1130 return NULL;
1134 /* Function vect_recog_widen_sum_pattern
1136 Try to find the following pattern:
1138 type x_t;
1139 TYPE x_T, sum = init;
1140 loop:
1141 sum_0 = phi <init, sum_1>
1142 S1 x_t = *p;
1143 S2 x_T = (TYPE) x_t;
1144 S3 sum_1 = x_T + sum_0;
1146 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1147 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1148 a special case of a reduction computation.
1150 Input:
1152 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1153 when this function is called with S3, the pattern {S2,S3} will be detected.
1155 Output:
1157 * TYPE_IN: The type of the input arguments to the pattern.
1159 * TYPE_OUT: The type of the output of this pattern.
1161 * Return value: A new stmt that will be used to replace the sequence of
1162 stmts that constitute the pattern. In this case it will be:
1163 WIDEN_SUM <x_t, sum_0>
1165 Note: The widening-sum idiom is a widening reduction pattern that is
1166 vectorized without preserving all the intermediate results. It
1167 produces only N/2 (widened) results (by summing up pairs of
1168 intermediate results) rather than all N results. Therefore, we
1169 cannot allow this pattern when we want to get all the results and in
1170 the correct order (as is the case when this computation is in an
1171 inner-loop nested in an outer-loop that us being vectorized). */
1173 static gimple
1174 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
1175 tree *type_out)
1177 gimple stmt, last_stmt = (*stmts)[0];
1178 tree oprnd0, oprnd1;
1179 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1180 tree type, half_type;
1181 gimple pattern_stmt;
1182 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1183 struct loop *loop;
1184 tree var;
1185 bool promotion;
1187 if (!loop_info)
1188 return NULL;
1190 loop = LOOP_VINFO_LOOP (loop_info);
1192 /* We don't allow changing the order of the computation in the inner-loop
1193 when doing outer-loop vectorization. */
1194 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1195 return NULL;
1197 if (!is_gimple_assign (last_stmt))
1198 return NULL;
1200 type = gimple_expr_type (last_stmt);
1202 /* Look for the following pattern
1203 DX = (TYPE) X;
1204 sum_1 = DX + sum_0;
1205 In which DX is at least double the size of X, and sum_1 has been
1206 recognized as a reduction variable.
1209 /* Starting from LAST_STMT, follow the defs of its uses in search
1210 of the above pattern. */
1212 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1213 return NULL;
1215 oprnd0 = gimple_assign_rhs1 (last_stmt);
1216 oprnd1 = gimple_assign_rhs2 (last_stmt);
1217 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1218 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1219 return NULL;
1221 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1222 we know that oprnd1 is the reduction variable (defined by a loop-header
1223 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1224 Left to check that oprnd0 is defined by a cast from type 'type' to type
1225 'TYPE'. */
1227 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1228 &promotion)
1229 || !promotion)
1230 return NULL;
1232 oprnd0 = gimple_assign_rhs1 (stmt);
1233 *type_in = half_type;
1234 *type_out = type;
1236 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1237 var = vect_recog_temp_ssa_var (type, NULL);
1238 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1240 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE, vect_location,
1243 "vect_recog_widen_sum_pattern: detected: ");
1244 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1245 dump_printf (MSG_NOTE, "\n");
1248 return pattern_stmt;
1252 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1254 Input:
1255 STMT - a statement to check.
1256 DEF - we support operations with two operands, one of which is constant.
1257 The other operand can be defined by a demotion operation, or by a
1258 previous statement in a sequence of over-promoted operations. In the
1259 later case DEF is used to replace that operand. (It is defined by a
1260 pattern statement we created for the previous statement in the
1261 sequence).
1263 Input/output:
1264 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1265 NULL, it's the type of DEF.
1266 STMTS - additional pattern statements. If a pattern statement (type
1267 conversion) is created in this function, its original statement is
1268 added to STMTS.
1270 Output:
1271 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1272 operands to use in the new pattern statement for STMT (will be created
1273 in vect_recog_over_widening_pattern ()).
1274 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1275 statements for STMT: the first one is a type promotion and the second
1276 one is the operation itself. We return the type promotion statement
1277 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1278 the second pattern statement. */
1280 static bool
1281 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1282 tree *op0, tree *op1, gimple *new_def_stmt,
1283 vec<gimple> *stmts)
1285 enum tree_code code;
1286 tree const_oprnd, oprnd;
1287 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1288 gimple def_stmt, new_stmt;
1289 bool first = false;
1290 bool promotion;
1292 *op0 = NULL_TREE;
1293 *op1 = NULL_TREE;
1294 *new_def_stmt = NULL;
1296 if (!is_gimple_assign (stmt))
1297 return false;
1299 code = gimple_assign_rhs_code (stmt);
1300 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1301 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1302 return false;
1304 oprnd = gimple_assign_rhs1 (stmt);
1305 const_oprnd = gimple_assign_rhs2 (stmt);
1306 type = gimple_expr_type (stmt);
1308 if (TREE_CODE (oprnd) != SSA_NAME
1309 || TREE_CODE (const_oprnd) != INTEGER_CST)
1310 return false;
1312 /* If oprnd has other uses besides that in stmt we cannot mark it
1313 as being part of a pattern only. */
1314 if (!has_single_use (oprnd))
1315 return false;
1317 /* If we are in the middle of a sequence, we use DEF from a previous
1318 statement. Otherwise, OPRND has to be a result of type promotion. */
1319 if (*new_type)
1321 half_type = *new_type;
1322 oprnd = def;
1324 else
1326 first = true;
1327 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1328 &promotion)
1329 || !promotion
1330 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1331 return false;
1334 /* Can we perform the operation on a smaller type? */
1335 switch (code)
1337 case BIT_IOR_EXPR:
1338 case BIT_XOR_EXPR:
1339 case BIT_AND_EXPR:
1340 if (!int_fits_type_p (const_oprnd, half_type))
1342 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1343 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1344 return false;
1346 interm_type = build_nonstandard_integer_type (
1347 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1348 if (!int_fits_type_p (const_oprnd, interm_type))
1349 return false;
1352 break;
1354 case LSHIFT_EXPR:
1355 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1356 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1357 return false;
1359 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1360 (e.g., if the original value was char, the shift amount is at most 8
1361 if we want to use short). */
1362 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1363 return false;
1365 interm_type = build_nonstandard_integer_type (
1366 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1368 if (!vect_supportable_shift (code, interm_type))
1369 return false;
1371 break;
1373 case RSHIFT_EXPR:
1374 if (vect_supportable_shift (code, half_type))
1375 break;
1377 /* Try intermediate type - HALF_TYPE is not supported. */
1378 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1379 return false;
1381 interm_type = build_nonstandard_integer_type (
1382 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1384 if (!vect_supportable_shift (code, interm_type))
1385 return false;
1387 break;
1389 default:
1390 gcc_unreachable ();
1393 /* There are four possible cases:
1394 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1395 the first statement in the sequence)
1396 a. The original, HALF_TYPE, is not enough - we replace the promotion
1397 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1398 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1399 promotion.
1400 2. OPRND is defined by a pattern statement we created.
1401 a. Its type is not sufficient for the operation, we create a new stmt:
1402 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1403 this statement in NEW_DEF_STMT, and it is later put in
1404 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1405 b. OPRND is good to use in the new statement. */
1406 if (first)
1408 if (interm_type)
1410 /* Replace the original type conversion HALF_TYPE->TYPE with
1411 HALF_TYPE->INTERM_TYPE. */
1412 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1414 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1415 /* Check if the already created pattern stmt is what we need. */
1416 if (!is_gimple_assign (new_stmt)
1417 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1418 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1419 return false;
1421 stmts->safe_push (def_stmt);
1422 oprnd = gimple_assign_lhs (new_stmt);
1424 else
1426 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1427 oprnd = gimple_assign_rhs1 (def_stmt);
1428 new_oprnd = make_ssa_name (interm_type);
1429 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1430 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1431 stmts->safe_push (def_stmt);
1432 oprnd = new_oprnd;
1435 else
1437 /* Retrieve the operand before the type promotion. */
1438 oprnd = gimple_assign_rhs1 (def_stmt);
1441 else
1443 if (interm_type)
1445 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1446 new_oprnd = make_ssa_name (interm_type);
1447 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1448 oprnd = new_oprnd;
1449 *new_def_stmt = new_stmt;
1452 /* Otherwise, OPRND is already set. */
1455 if (interm_type)
1456 *new_type = interm_type;
1457 else
1458 *new_type = half_type;
1460 *op0 = oprnd;
1461 *op1 = fold_convert (*new_type, const_oprnd);
1463 return true;
1467 /* Try to find a statement or a sequence of statements that can be performed
1468 on a smaller type:
1470 type x_t;
1471 TYPE x_T, res0_T, res1_T;
1472 loop:
1473 S1 x_t = *p;
1474 S2 x_T = (TYPE) x_t;
1475 S3 res0_T = op (x_T, C0);
1476 S4 res1_T = op (res0_T, C1);
1477 S5 ... = () res1_T; - type demotion
1479 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1480 constants.
1481 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1482 be 'type' or some intermediate type. For now, we expect S5 to be a type
1483 demotion operation. We also check that S3 and S4 have only one use. */
1485 static gimple
1486 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1487 tree *type_in, tree *type_out)
1489 gimple stmt = stmts->pop ();
1490 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1491 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1492 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1493 bool first;
1494 tree type = NULL;
1496 first = true;
1497 while (1)
1499 if (!vinfo_for_stmt (stmt)
1500 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1501 return NULL;
1503 new_def_stmt = NULL;
1504 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1505 &op0, &op1, &new_def_stmt,
1506 stmts))
1508 if (first)
1509 return NULL;
1510 else
1511 break;
1514 /* STMT can be performed on a smaller type. Check its uses. */
1515 use_stmt = vect_single_imm_use (stmt);
1516 if (!use_stmt || !is_gimple_assign (use_stmt))
1517 return NULL;
1519 /* Create pattern statement for STMT. */
1520 vectype = get_vectype_for_scalar_type (new_type);
1521 if (!vectype)
1522 return NULL;
1524 /* We want to collect all the statements for which we create pattern
1525 statetments, except for the case when the last statement in the
1526 sequence doesn't have a corresponding pattern statement. In such
1527 case we associate the last pattern statement with the last statement
1528 in the sequence. Therefore, we only add the original statement to
1529 the list if we know that it is not the last. */
1530 if (prev_stmt)
1531 stmts->safe_push (prev_stmt);
1533 var = vect_recog_temp_ssa_var (new_type, NULL);
1534 pattern_stmt
1535 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1536 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1537 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1539 if (dump_enabled_p ())
1541 dump_printf_loc (MSG_NOTE, vect_location,
1542 "created pattern stmt: ");
1543 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1544 dump_printf (MSG_NOTE, "\n");
1547 type = gimple_expr_type (stmt);
1548 prev_stmt = stmt;
1549 stmt = use_stmt;
1551 first = false;
1554 /* We got a sequence. We expect it to end with a type demotion operation.
1555 Otherwise, we quit (for now). There are three possible cases: the
1556 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1557 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1558 NEW_TYPE differs (we create a new conversion statement). */
1559 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1561 use_lhs = gimple_assign_lhs (use_stmt);
1562 use_type = TREE_TYPE (use_lhs);
1563 /* Support only type demotion or signedess change. */
1564 if (!INTEGRAL_TYPE_P (use_type)
1565 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1566 return NULL;
1568 /* Check that NEW_TYPE is not bigger than the conversion result. */
1569 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1570 return NULL;
1572 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1573 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1575 /* Create NEW_TYPE->USE_TYPE conversion. */
1576 new_oprnd = make_ssa_name (use_type);
1577 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1578 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1580 *type_in = get_vectype_for_scalar_type (new_type);
1581 *type_out = get_vectype_for_scalar_type (use_type);
1583 /* We created a pattern statement for the last statement in the
1584 sequence, so we don't need to associate it with the pattern
1585 statement created for PREV_STMT. Therefore, we add PREV_STMT
1586 to the list in order to mark it later in vect_pattern_recog_1. */
1587 if (prev_stmt)
1588 stmts->safe_push (prev_stmt);
1590 else
1592 if (prev_stmt)
1593 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1594 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1596 *type_in = vectype;
1597 *type_out = NULL_TREE;
1600 stmts->safe_push (use_stmt);
1602 else
1603 /* TODO: support general case, create a conversion to the correct type. */
1604 return NULL;
1606 /* Pattern detected. */
1607 if (dump_enabled_p ())
1609 dump_printf_loc (MSG_NOTE, vect_location,
1610 "vect_recog_over_widening_pattern: detected: ");
1611 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1612 dump_printf (MSG_NOTE, "\n");
1615 return pattern_stmt;
1618 /* Detect widening shift pattern:
1620 type a_t;
1621 TYPE a_T, res_T;
1623 S1 a_t = ;
1624 S2 a_T = (TYPE) a_t;
1625 S3 res_T = a_T << CONST;
1627 where type 'TYPE' is at least double the size of type 'type'.
1629 Also detect cases where the shift result is immediately converted
1630 to another type 'result_type' that is no larger in size than 'TYPE'.
1631 In those cases we perform a widen-shift that directly results in
1632 'result_type', to avoid a possible over-widening situation:
1634 type a_t;
1635 TYPE a_T, res_T;
1636 result_type res_result;
1638 S1 a_t = ;
1639 S2 a_T = (TYPE) a_t;
1640 S3 res_T = a_T << CONST;
1641 S4 res_result = (result_type) res_T;
1642 '--> res_result' = a_t w<< CONST;
1644 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1645 create an additional pattern stmt for S2 to create a variable of an
1646 intermediate type, and perform widen-shift on the intermediate type:
1648 type a_t;
1649 interm_type a_it;
1650 TYPE a_T, res_T, res_T';
1652 S1 a_t = ;
1653 S2 a_T = (TYPE) a_t;
1654 '--> a_it = (interm_type) a_t;
1655 S3 res_T = a_T << CONST;
1656 '--> res_T' = a_it <<* CONST;
1658 Input/Output:
1660 * STMTS: Contains a stmt from which the pattern search begins.
1661 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1662 in STMTS. When an intermediate type is used and a pattern statement is
1663 created for S2, we also put S2 here (before S3).
1665 Output:
1667 * TYPE_IN: The type of the input arguments to the pattern.
1669 * TYPE_OUT: The type of the output of this pattern.
1671 * Return value: A new stmt that will be used to replace the sequence of
1672 stmts that constitute the pattern. In this case it will be:
1673 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1675 static gimple
1676 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1677 tree *type_in, tree *type_out)
1679 gimple last_stmt = stmts->pop ();
1680 gimple def_stmt0;
1681 tree oprnd0, oprnd1;
1682 tree type, half_type0;
1683 gimple pattern_stmt;
1684 tree vectype, vectype_out = NULL_TREE;
1685 tree var;
1686 enum tree_code dummy_code;
1687 int dummy_int;
1688 vec<tree> dummy_vec;
1689 gimple use_stmt;
1690 bool promotion;
1692 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1693 return NULL;
1695 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1696 return NULL;
1698 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1699 return NULL;
1701 oprnd0 = gimple_assign_rhs1 (last_stmt);
1702 oprnd1 = gimple_assign_rhs2 (last_stmt);
1703 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1704 return NULL;
1706 /* Check operand 0: it has to be defined by a type promotion. */
1707 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1708 &promotion)
1709 || !promotion)
1710 return NULL;
1712 /* Check operand 1: has to be positive. We check that it fits the type
1713 in vect_handle_widen_op_by_const (). */
1714 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1715 return NULL;
1717 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1718 type = gimple_expr_type (last_stmt);
1720 /* Check for subsequent conversion to another type. */
1721 use_stmt = vect_single_imm_use (last_stmt);
1722 if (use_stmt && is_gimple_assign (use_stmt)
1723 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1724 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1726 tree use_lhs = gimple_assign_lhs (use_stmt);
1727 tree use_type = TREE_TYPE (use_lhs);
1729 if (INTEGRAL_TYPE_P (use_type)
1730 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1732 last_stmt = use_stmt;
1733 type = use_type;
1737 /* Check if this a widening operation. */
1738 gimple wstmt = NULL;
1739 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1740 &oprnd0, &wstmt,
1741 type, &half_type0, def_stmt0))
1742 return NULL;
1744 /* Pattern detected. */
1745 if (dump_enabled_p ())
1746 dump_printf_loc (MSG_NOTE, vect_location,
1747 "vect_recog_widen_shift_pattern: detected:\n");
1749 /* Check target support. */
1750 vectype = get_vectype_for_scalar_type (half_type0);
1751 vectype_out = get_vectype_for_scalar_type (type);
1753 if (!vectype
1754 || !vectype_out
1755 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1756 vectype_out, vectype,
1757 &dummy_code, &dummy_code,
1758 &dummy_int, &dummy_vec))
1759 return NULL;
1761 *type_in = vectype;
1762 *type_out = vectype_out;
1764 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1765 var = vect_recog_temp_ssa_var (type, NULL);
1766 pattern_stmt =
1767 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1768 if (wstmt)
1770 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1771 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1772 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1773 new_pattern_def_seq (stmt_vinfo, wstmt);
1774 stmt_vec_info new_stmt_info
1775 = new_stmt_vec_info (wstmt, loop_vinfo, bb_vinfo);
1776 set_vinfo_for_stmt (wstmt, new_stmt_info);
1777 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1780 if (dump_enabled_p ())
1781 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1783 stmts->safe_push (last_stmt);
1784 return pattern_stmt;
1787 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1789 type a_t, b_t, c_t;
1791 S0 a_t = b_t r<< c_t;
1793 Input/Output:
1795 * STMTS: Contains a stmt from which the pattern search begins,
1796 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1797 with a sequence:
1799 S1 d_t = -c_t;
1800 S2 e_t = d_t & (B - 1);
1801 S3 f_t = b_t << c_t;
1802 S4 g_t = b_t >> e_t;
1803 S0 a_t = f_t | g_t;
1805 where B is element bitsize of type.
1807 Output:
1809 * TYPE_IN: The type of the input arguments to the pattern.
1811 * TYPE_OUT: The type of the output of this pattern.
1813 * Return value: A new stmt that will be used to replace the rotate
1814 S0 stmt. */
1816 static gimple
1817 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1819 gimple last_stmt = stmts->pop ();
1820 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1821 gimple pattern_stmt, def_stmt;
1822 enum tree_code rhs_code;
1823 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1824 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1825 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1826 enum vect_def_type dt;
1827 optab optab1, optab2;
1828 edge ext_def = NULL;
1830 if (!is_gimple_assign (last_stmt))
1831 return NULL;
1833 rhs_code = gimple_assign_rhs_code (last_stmt);
1834 switch (rhs_code)
1836 case LROTATE_EXPR:
1837 case RROTATE_EXPR:
1838 break;
1839 default:
1840 return NULL;
1843 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1844 return NULL;
1846 lhs = gimple_assign_lhs (last_stmt);
1847 oprnd0 = gimple_assign_rhs1 (last_stmt);
1848 type = TREE_TYPE (oprnd0);
1849 oprnd1 = gimple_assign_rhs2 (last_stmt);
1850 if (TREE_CODE (oprnd0) != SSA_NAME
1851 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1852 || !INTEGRAL_TYPE_P (type)
1853 || !TYPE_UNSIGNED (type))
1854 return NULL;
1856 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1857 &def, &dt))
1858 return NULL;
1860 if (dt != vect_internal_def
1861 && dt != vect_constant_def
1862 && dt != vect_external_def)
1863 return NULL;
1865 vectype = get_vectype_for_scalar_type (type);
1866 if (vectype == NULL_TREE)
1867 return NULL;
1869 /* If vector/vector or vector/scalar rotate is supported by the target,
1870 don't do anything here. */
1871 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1872 if (optab1
1873 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1874 return NULL;
1876 if (bb_vinfo != NULL || dt != vect_internal_def)
1878 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1879 if (optab2
1880 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1881 return NULL;
1884 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1885 don't do anything here either. */
1886 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1887 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1888 if (!optab1
1889 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1890 || !optab2
1891 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1893 if (bb_vinfo == NULL && dt == vect_internal_def)
1894 return NULL;
1895 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1896 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1897 if (!optab1
1898 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1899 || !optab2
1900 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1901 return NULL;
1904 *type_in = vectype;
1905 *type_out = vectype;
1906 if (*type_in == NULL_TREE)
1907 return NULL;
1909 if (dt == vect_external_def
1910 && TREE_CODE (oprnd1) == SSA_NAME
1911 && loop_vinfo)
1913 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1914 ext_def = loop_preheader_edge (loop);
1915 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1917 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1918 if (bb == NULL
1919 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1920 ext_def = NULL;
1924 def = NULL_TREE;
1925 if (TREE_CODE (oprnd1) == INTEGER_CST
1926 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1927 def = oprnd1;
1928 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1930 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1931 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1932 && TYPE_PRECISION (TREE_TYPE (rhs1))
1933 == TYPE_PRECISION (type))
1934 def = rhs1;
1937 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1938 if (def == NULL_TREE)
1940 def = vect_recog_temp_ssa_var (type, NULL);
1941 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1942 if (ext_def)
1944 basic_block new_bb
1945 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1946 gcc_assert (!new_bb);
1948 else
1949 append_pattern_def_seq (stmt_vinfo, def_stmt);
1951 stype = TREE_TYPE (def);
1953 if (TREE_CODE (def) == INTEGER_CST)
1955 if (!tree_fits_uhwi_p (def)
1956 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1957 || integer_zerop (def))
1958 return NULL;
1959 def2 = build_int_cst (stype,
1960 GET_MODE_PRECISION (TYPE_MODE (type))
1961 - tree_to_uhwi (def));
1963 else
1965 tree vecstype = get_vectype_for_scalar_type (stype);
1966 stmt_vec_info def_stmt_vinfo;
1968 if (vecstype == NULL_TREE)
1969 return NULL;
1970 def2 = vect_recog_temp_ssa_var (stype, NULL);
1971 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1972 if (ext_def)
1974 basic_block new_bb
1975 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1976 gcc_assert (!new_bb);
1978 else
1980 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1981 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1982 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1983 append_pattern_def_seq (stmt_vinfo, def_stmt);
1986 def2 = vect_recog_temp_ssa_var (stype, NULL);
1987 tree mask
1988 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1989 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1990 gimple_assign_lhs (def_stmt), mask);
1991 if (ext_def)
1993 basic_block new_bb
1994 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1995 gcc_assert (!new_bb);
1997 else
1999 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2000 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2001 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2002 append_pattern_def_seq (stmt_vinfo, def_stmt);
2006 var1 = vect_recog_temp_ssa_var (type, NULL);
2007 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2008 ? LSHIFT_EXPR : RSHIFT_EXPR,
2009 oprnd0, def);
2010 append_pattern_def_seq (stmt_vinfo, def_stmt);
2012 var2 = vect_recog_temp_ssa_var (type, NULL);
2013 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2014 ? RSHIFT_EXPR : LSHIFT_EXPR,
2015 oprnd0, def2);
2016 append_pattern_def_seq (stmt_vinfo, def_stmt);
2018 /* Pattern detected. */
2019 if (dump_enabled_p ())
2020 dump_printf_loc (MSG_NOTE, vect_location,
2021 "vect_recog_rotate_pattern: detected:\n");
2023 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2024 var = vect_recog_temp_ssa_var (type, NULL);
2025 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2027 if (dump_enabled_p ())
2028 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2030 stmts->safe_push (last_stmt);
2031 return pattern_stmt;
2034 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2035 vectorized:
2037 type a_t;
2038 TYPE b_T, res_T;
2040 S1 a_t = ;
2041 S2 b_T = ;
2042 S3 res_T = b_T op a_t;
2044 where type 'TYPE' is a type with different size than 'type',
2045 and op is <<, >> or rotate.
2047 Also detect cases:
2049 type a_t;
2050 TYPE b_T, c_T, res_T;
2052 S0 c_T = ;
2053 S1 a_t = (type) c_T;
2054 S2 b_T = ;
2055 S3 res_T = b_T op a_t;
2057 Input/Output:
2059 * STMTS: Contains a stmt from which the pattern search begins,
2060 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2061 with a shift/rotate which has same type on both operands, in the
2062 second case just b_T op c_T, in the first case with added cast
2063 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2065 Output:
2067 * TYPE_IN: The type of the input arguments to the pattern.
2069 * TYPE_OUT: The type of the output of this pattern.
2071 * Return value: A new stmt that will be used to replace the shift/rotate
2072 S3 stmt. */
2074 static gimple
2075 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
2076 tree *type_in, tree *type_out)
2078 gimple last_stmt = stmts->pop ();
2079 tree oprnd0, oprnd1, lhs, var;
2080 gimple pattern_stmt, def_stmt;
2081 enum tree_code rhs_code;
2082 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2083 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2084 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2085 enum vect_def_type dt;
2086 tree def;
2088 if (!is_gimple_assign (last_stmt))
2089 return NULL;
2091 rhs_code = gimple_assign_rhs_code (last_stmt);
2092 switch (rhs_code)
2094 case LSHIFT_EXPR:
2095 case RSHIFT_EXPR:
2096 case LROTATE_EXPR:
2097 case RROTATE_EXPR:
2098 break;
2099 default:
2100 return NULL;
2103 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2104 return NULL;
2106 lhs = gimple_assign_lhs (last_stmt);
2107 oprnd0 = gimple_assign_rhs1 (last_stmt);
2108 oprnd1 = gimple_assign_rhs2 (last_stmt);
2109 if (TREE_CODE (oprnd0) != SSA_NAME
2110 || TREE_CODE (oprnd1) != SSA_NAME
2111 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2112 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2113 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2114 || TYPE_PRECISION (TREE_TYPE (lhs))
2115 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2116 return NULL;
2118 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
2119 &def, &dt))
2120 return NULL;
2122 if (dt != vect_internal_def)
2123 return NULL;
2125 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2126 *type_out = *type_in;
2127 if (*type_in == NULL_TREE)
2128 return NULL;
2130 def = NULL_TREE;
2131 if (gimple_assign_cast_p (def_stmt))
2133 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2134 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2135 && TYPE_PRECISION (TREE_TYPE (rhs1))
2136 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2137 def = rhs1;
2140 if (def == NULL_TREE)
2142 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2143 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2144 new_pattern_def_seq (stmt_vinfo, def_stmt);
2147 /* Pattern detected. */
2148 if (dump_enabled_p ())
2149 dump_printf_loc (MSG_NOTE, vect_location,
2150 "vect_recog_vector_vector_shift_pattern: detected:\n");
2152 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2153 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2154 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2156 if (dump_enabled_p ())
2157 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2159 stmts->safe_push (last_stmt);
2160 return pattern_stmt;
2163 /* Detect a signed division by a constant that wouldn't be
2164 otherwise vectorized:
2166 type a_t, b_t;
2168 S1 a_t = b_t / N;
2170 where type 'type' is an integral type and N is a constant.
2172 Similarly handle modulo by a constant:
2174 S4 a_t = b_t % N;
2176 Input/Output:
2178 * STMTS: Contains a stmt from which the pattern search begins,
2179 i.e. the division stmt. S1 is replaced by if N is a power
2180 of two constant and type is signed:
2181 S3 y_t = b_t < 0 ? N - 1 : 0;
2182 S2 x_t = b_t + y_t;
2183 S1' a_t = x_t >> log2 (N);
2185 S4 is replaced if N is a power of two constant and
2186 type is signed by (where *_T temporaries have unsigned type):
2187 S9 y_T = b_t < 0 ? -1U : 0U;
2188 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2189 S7 z_t = (type) z_T;
2190 S6 w_t = b_t + z_t;
2191 S5 x_t = w_t & (N - 1);
2192 S4' a_t = x_t - z_t;
2194 Output:
2196 * TYPE_IN: The type of the input arguments to the pattern.
2198 * TYPE_OUT: The type of the output of this pattern.
2200 * Return value: A new stmt that will be used to replace the division
2201 S1 or modulo S4 stmt. */
2203 static gimple
2204 vect_recog_divmod_pattern (vec<gimple> *stmts,
2205 tree *type_in, tree *type_out)
2207 gimple last_stmt = stmts->pop ();
2208 tree oprnd0, oprnd1, vectype, itype, cond;
2209 gimple pattern_stmt, def_stmt;
2210 enum tree_code rhs_code;
2211 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2213 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2214 optab optab;
2215 tree q;
2216 int dummy_int, prec;
2217 stmt_vec_info def_stmt_vinfo;
2219 if (!is_gimple_assign (last_stmt))
2220 return NULL;
2222 rhs_code = gimple_assign_rhs_code (last_stmt);
2223 switch (rhs_code)
2225 case TRUNC_DIV_EXPR:
2226 case TRUNC_MOD_EXPR:
2227 break;
2228 default:
2229 return NULL;
2232 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2233 return NULL;
2235 oprnd0 = gimple_assign_rhs1 (last_stmt);
2236 oprnd1 = gimple_assign_rhs2 (last_stmt);
2237 itype = TREE_TYPE (oprnd0);
2238 if (TREE_CODE (oprnd0) != SSA_NAME
2239 || TREE_CODE (oprnd1) != INTEGER_CST
2240 || TREE_CODE (itype) != INTEGER_TYPE
2241 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2242 return NULL;
2244 vectype = get_vectype_for_scalar_type (itype);
2245 if (vectype == NULL_TREE)
2246 return NULL;
2248 /* If the target can handle vectorized division or modulo natively,
2249 don't attempt to optimize this. */
2250 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2251 if (optab != unknown_optab)
2253 machine_mode vec_mode = TYPE_MODE (vectype);
2254 int icode = (int) optab_handler (optab, vec_mode);
2255 if (icode != CODE_FOR_nothing)
2256 return NULL;
2259 prec = TYPE_PRECISION (itype);
2260 if (integer_pow2p (oprnd1))
2262 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2263 return NULL;
2265 /* Pattern detected. */
2266 if (dump_enabled_p ())
2267 dump_printf_loc (MSG_NOTE, vect_location,
2268 "vect_recog_divmod_pattern: detected:\n");
2270 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2271 build_int_cst (itype, 0));
2272 if (rhs_code == TRUNC_DIV_EXPR)
2274 tree var = vect_recog_temp_ssa_var (itype, NULL);
2275 tree shift;
2276 def_stmt
2277 = gimple_build_assign (var, COND_EXPR, cond,
2278 fold_build2 (MINUS_EXPR, itype, oprnd1,
2279 build_int_cst (itype, 1)),
2280 build_int_cst (itype, 0));
2281 new_pattern_def_seq (stmt_vinfo, def_stmt);
2282 var = vect_recog_temp_ssa_var (itype, NULL);
2283 def_stmt
2284 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2285 gimple_assign_lhs (def_stmt));
2286 append_pattern_def_seq (stmt_vinfo, def_stmt);
2288 shift = build_int_cst (itype, tree_log2 (oprnd1));
2289 pattern_stmt
2290 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2291 RSHIFT_EXPR, var, shift);
2293 else
2295 tree signmask;
2296 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2297 if (compare_tree_int (oprnd1, 2) == 0)
2299 signmask = vect_recog_temp_ssa_var (itype, NULL);
2300 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2301 build_int_cst (itype, 1),
2302 build_int_cst (itype, 0));
2303 append_pattern_def_seq (stmt_vinfo, def_stmt);
2305 else
2307 tree utype
2308 = build_nonstandard_integer_type (prec, 1);
2309 tree vecutype = get_vectype_for_scalar_type (utype);
2310 tree shift
2311 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2312 - tree_log2 (oprnd1));
2313 tree var = vect_recog_temp_ssa_var (utype, NULL);
2315 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2316 build_int_cst (utype, -1),
2317 build_int_cst (utype, 0));
2318 def_stmt_vinfo
2319 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2320 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2321 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2322 append_pattern_def_seq (stmt_vinfo, def_stmt);
2323 var = vect_recog_temp_ssa_var (utype, NULL);
2324 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2325 gimple_assign_lhs (def_stmt),
2326 shift);
2327 def_stmt_vinfo
2328 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2329 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2330 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2331 append_pattern_def_seq (stmt_vinfo, def_stmt);
2332 signmask = vect_recog_temp_ssa_var (itype, NULL);
2333 def_stmt
2334 = gimple_build_assign (signmask, NOP_EXPR, var);
2335 append_pattern_def_seq (stmt_vinfo, def_stmt);
2337 def_stmt
2338 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2339 PLUS_EXPR, oprnd0, signmask);
2340 append_pattern_def_seq (stmt_vinfo, def_stmt);
2341 def_stmt
2342 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2343 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2344 fold_build2 (MINUS_EXPR, itype, oprnd1,
2345 build_int_cst (itype, 1)));
2346 append_pattern_def_seq (stmt_vinfo, def_stmt);
2348 pattern_stmt
2349 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2350 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2351 signmask);
2354 if (dump_enabled_p ())
2355 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2358 stmts->safe_push (last_stmt);
2360 *type_in = vectype;
2361 *type_out = vectype;
2362 return pattern_stmt;
2365 if (prec > HOST_BITS_PER_WIDE_INT
2366 || integer_zerop (oprnd1))
2367 return NULL;
2369 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2370 return NULL;
2372 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2374 if (TYPE_UNSIGNED (itype))
2376 unsigned HOST_WIDE_INT mh, ml;
2377 int pre_shift, post_shift;
2378 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2379 & GET_MODE_MASK (TYPE_MODE (itype)));
2380 tree t1, t2, t3, t4;
2382 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2383 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2384 return NULL;
2386 /* Find a suitable multiplier and right shift count
2387 instead of multiplying with D. */
2388 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2390 /* If the suggested multiplier is more than SIZE bits, we can do better
2391 for even divisors, using an initial right shift. */
2392 if (mh != 0 && (d & 1) == 0)
2394 pre_shift = floor_log2 (d & -d);
2395 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2396 &ml, &post_shift, &dummy_int);
2397 gcc_assert (!mh);
2399 else
2400 pre_shift = 0;
2402 if (mh != 0)
2404 if (post_shift - 1 >= prec)
2405 return NULL;
2407 /* t1 = oprnd0 h* ml;
2408 t2 = oprnd0 - t1;
2409 t3 = t2 >> 1;
2410 t4 = t1 + t3;
2411 q = t4 >> (post_shift - 1); */
2412 t1 = vect_recog_temp_ssa_var (itype, NULL);
2413 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2414 build_int_cst (itype, ml));
2415 append_pattern_def_seq (stmt_vinfo, def_stmt);
2417 t2 = vect_recog_temp_ssa_var (itype, NULL);
2418 def_stmt
2419 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2420 append_pattern_def_seq (stmt_vinfo, def_stmt);
2422 t3 = vect_recog_temp_ssa_var (itype, NULL);
2423 def_stmt
2424 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2425 append_pattern_def_seq (stmt_vinfo, def_stmt);
2427 t4 = vect_recog_temp_ssa_var (itype, NULL);
2428 def_stmt
2429 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2431 if (post_shift != 1)
2433 append_pattern_def_seq (stmt_vinfo, def_stmt);
2435 q = vect_recog_temp_ssa_var (itype, NULL);
2436 pattern_stmt
2437 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2438 build_int_cst (itype, post_shift - 1));
2440 else
2442 q = t4;
2443 pattern_stmt = def_stmt;
2446 else
2448 if (pre_shift >= prec || post_shift >= prec)
2449 return NULL;
2451 /* t1 = oprnd0 >> pre_shift;
2452 t2 = t1 h* ml;
2453 q = t2 >> post_shift; */
2454 if (pre_shift)
2456 t1 = vect_recog_temp_ssa_var (itype, NULL);
2457 def_stmt
2458 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2459 build_int_cst (NULL, pre_shift));
2460 append_pattern_def_seq (stmt_vinfo, def_stmt);
2462 else
2463 t1 = oprnd0;
2465 t2 = vect_recog_temp_ssa_var (itype, NULL);
2466 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2467 build_int_cst (itype, ml));
2469 if (post_shift)
2471 append_pattern_def_seq (stmt_vinfo, def_stmt);
2473 q = vect_recog_temp_ssa_var (itype, NULL);
2474 def_stmt
2475 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2476 build_int_cst (itype, post_shift));
2478 else
2479 q = t2;
2481 pattern_stmt = def_stmt;
2484 else
2486 unsigned HOST_WIDE_INT ml;
2487 int post_shift;
2488 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2489 unsigned HOST_WIDE_INT abs_d;
2490 bool add = false;
2491 tree t1, t2, t3, t4;
2493 /* Give up for -1. */
2494 if (d == -1)
2495 return NULL;
2497 /* Since d might be INT_MIN, we have to cast to
2498 unsigned HOST_WIDE_INT before negating to avoid
2499 undefined signed overflow. */
2500 abs_d = (d >= 0
2501 ? (unsigned HOST_WIDE_INT) d
2502 : - (unsigned HOST_WIDE_INT) d);
2504 /* n rem d = n rem -d */
2505 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2507 d = abs_d;
2508 oprnd1 = build_int_cst (itype, abs_d);
2510 else if (HOST_BITS_PER_WIDE_INT >= prec
2511 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2512 /* This case is not handled correctly below. */
2513 return NULL;
2515 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2516 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2518 add = true;
2519 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2521 if (post_shift >= prec)
2522 return NULL;
2524 /* t1 = oprnd0 h* ml; */
2525 t1 = vect_recog_temp_ssa_var (itype, NULL);
2526 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2527 build_int_cst (itype, ml));
2529 if (add)
2531 /* t2 = t1 + oprnd0; */
2532 append_pattern_def_seq (stmt_vinfo, def_stmt);
2533 t2 = vect_recog_temp_ssa_var (itype, NULL);
2534 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2536 else
2537 t2 = t1;
2539 if (post_shift)
2541 /* t3 = t2 >> post_shift; */
2542 append_pattern_def_seq (stmt_vinfo, def_stmt);
2543 t3 = vect_recog_temp_ssa_var (itype, NULL);
2544 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2545 build_int_cst (itype, post_shift));
2547 else
2548 t3 = t2;
2550 wide_int oprnd0_min, oprnd0_max;
2551 int msb = 1;
2552 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2554 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2555 msb = 0;
2556 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2557 msb = -1;
2560 if (msb == 0 && d >= 0)
2562 /* q = t3; */
2563 q = t3;
2564 pattern_stmt = def_stmt;
2566 else
2568 /* t4 = oprnd0 >> (prec - 1);
2569 or if we know from VRP that oprnd0 >= 0
2570 t4 = 0;
2571 or if we know from VRP that oprnd0 < 0
2572 t4 = -1; */
2573 append_pattern_def_seq (stmt_vinfo, def_stmt);
2574 t4 = vect_recog_temp_ssa_var (itype, NULL);
2575 if (msb != 1)
2576 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2577 build_int_cst (itype, msb));
2578 else
2579 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2580 build_int_cst (itype, prec - 1));
2581 append_pattern_def_seq (stmt_vinfo, def_stmt);
2583 /* q = t3 - t4; or q = t4 - t3; */
2584 q = vect_recog_temp_ssa_var (itype, NULL);
2585 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2586 d < 0 ? t3 : t4);
2590 if (rhs_code == TRUNC_MOD_EXPR)
2592 tree r, t1;
2594 /* We divided. Now finish by:
2595 t1 = q * oprnd1;
2596 r = oprnd0 - t1; */
2597 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2599 t1 = vect_recog_temp_ssa_var (itype, NULL);
2600 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2601 append_pattern_def_seq (stmt_vinfo, def_stmt);
2603 r = vect_recog_temp_ssa_var (itype, NULL);
2604 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2607 /* Pattern detected. */
2608 if (dump_enabled_p ())
2610 dump_printf_loc (MSG_NOTE, vect_location,
2611 "vect_recog_divmod_pattern: detected: ");
2612 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2613 dump_printf (MSG_NOTE, "\n");
2616 stmts->safe_push (last_stmt);
2618 *type_in = vectype;
2619 *type_out = vectype;
2620 return pattern_stmt;
2623 /* Function vect_recog_mixed_size_cond_pattern
2625 Try to find the following pattern:
2627 type x_t, y_t;
2628 TYPE a_T, b_T, c_T;
2629 loop:
2630 S1 a_T = x_t CMP y_t ? b_T : c_T;
2632 where type 'TYPE' is an integral type which has different size
2633 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2634 than 'type', the constants need to fit into an integer type
2635 with the same width as 'type') or results of conversion from 'type'.
2637 Input:
2639 * LAST_STMT: A stmt from which the pattern search begins.
2641 Output:
2643 * TYPE_IN: The type of the input arguments to the pattern.
2645 * TYPE_OUT: The type of the output of this pattern.
2647 * Return value: A new stmt that will be used to replace the pattern.
2648 Additionally a def_stmt is added.
2650 a_it = x_t CMP y_t ? b_it : c_it;
2651 a_T = (TYPE) a_it; */
2653 static gimple
2654 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2655 tree *type_out)
2657 gimple last_stmt = (*stmts)[0];
2658 tree cond_expr, then_clause, else_clause;
2659 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2660 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2661 machine_mode cmpmode;
2662 gimple pattern_stmt, def_stmt;
2663 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2664 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2665 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2666 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2667 bool promotion;
2668 tree comp_scalar_type;
2670 if (!is_gimple_assign (last_stmt)
2671 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2672 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2673 return NULL;
2675 cond_expr = gimple_assign_rhs1 (last_stmt);
2676 then_clause = gimple_assign_rhs2 (last_stmt);
2677 else_clause = gimple_assign_rhs3 (last_stmt);
2679 if (!COMPARISON_CLASS_P (cond_expr))
2680 return NULL;
2682 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2683 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2684 if (comp_vectype == NULL_TREE)
2685 return NULL;
2687 type = gimple_expr_type (last_stmt);
2688 if (types_compatible_p (type, comp_scalar_type)
2689 || ((TREE_CODE (then_clause) != INTEGER_CST
2690 || TREE_CODE (else_clause) != INTEGER_CST)
2691 && !INTEGRAL_TYPE_P (comp_scalar_type))
2692 || !INTEGRAL_TYPE_P (type))
2693 return NULL;
2695 if ((TREE_CODE (then_clause) != INTEGER_CST
2696 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2697 &def_stmt0, &promotion))
2698 || (TREE_CODE (else_clause) != INTEGER_CST
2699 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2700 &def_stmt1, &promotion)))
2701 return NULL;
2703 if (orig_type0 && orig_type1
2704 && !types_compatible_p (orig_type0, orig_type1))
2705 return NULL;
2707 if (orig_type0)
2709 if (!types_compatible_p (orig_type0, comp_scalar_type))
2710 return NULL;
2711 then_clause = gimple_assign_rhs1 (def_stmt0);
2712 itype = orig_type0;
2715 if (orig_type1)
2717 if (!types_compatible_p (orig_type1, comp_scalar_type))
2718 return NULL;
2719 else_clause = gimple_assign_rhs1 (def_stmt1);
2720 itype = orig_type1;
2723 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2725 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2726 return NULL;
2728 vectype = get_vectype_for_scalar_type (type);
2729 if (vectype == NULL_TREE)
2730 return NULL;
2732 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2733 return NULL;
2735 if (itype == NULL_TREE)
2736 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2737 TYPE_UNSIGNED (type));
2739 if (itype == NULL_TREE
2740 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2741 return NULL;
2743 vecitype = get_vectype_for_scalar_type (itype);
2744 if (vecitype == NULL_TREE)
2745 return NULL;
2747 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2748 return NULL;
2750 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2752 if ((TREE_CODE (then_clause) == INTEGER_CST
2753 && !int_fits_type_p (then_clause, itype))
2754 || (TREE_CODE (else_clause) == INTEGER_CST
2755 && !int_fits_type_p (else_clause, itype)))
2756 return NULL;
2759 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2760 COND_EXPR, unshare_expr (cond_expr),
2761 fold_convert (itype, then_clause),
2762 fold_convert (itype, else_clause));
2763 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2764 NOP_EXPR, gimple_assign_lhs (def_stmt));
2766 new_pattern_def_seq (stmt_vinfo, def_stmt);
2767 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2768 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2769 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2770 *type_in = vecitype;
2771 *type_out = vectype;
2773 if (dump_enabled_p ())
2774 dump_printf_loc (MSG_NOTE, vect_location,
2775 "vect_recog_mixed_size_cond_pattern: detected:\n");
2777 return pattern_stmt;
2781 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2782 true if bool VAR can be optimized that way. */
2784 static bool
2785 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2787 gimple def_stmt;
2788 enum vect_def_type dt;
2789 tree def, rhs1;
2790 enum tree_code rhs_code;
2792 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2793 &dt))
2794 return false;
2796 if (dt != vect_internal_def)
2797 return false;
2799 if (!is_gimple_assign (def_stmt))
2800 return false;
2802 if (!has_single_use (def))
2803 return false;
2805 rhs1 = gimple_assign_rhs1 (def_stmt);
2806 rhs_code = gimple_assign_rhs_code (def_stmt);
2807 switch (rhs_code)
2809 case SSA_NAME:
2810 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2812 CASE_CONVERT:
2813 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2814 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2815 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2816 return false;
2817 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2819 case BIT_NOT_EXPR:
2820 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2822 case BIT_AND_EXPR:
2823 case BIT_IOR_EXPR:
2824 case BIT_XOR_EXPR:
2825 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2826 return false;
2827 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2828 bb_vinfo);
2830 default:
2831 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2833 tree vecitype, comp_vectype;
2835 /* If the comparison can throw, then is_gimple_condexpr will be
2836 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2837 if (stmt_could_throw_p (def_stmt))
2838 return false;
2840 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2841 if (comp_vectype == NULL_TREE)
2842 return false;
2844 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2846 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2847 tree itype
2848 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2849 vecitype = get_vectype_for_scalar_type (itype);
2850 if (vecitype == NULL_TREE)
2851 return false;
2853 else
2854 vecitype = comp_vectype;
2855 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2857 return false;
2862 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2863 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2864 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2866 static tree
2867 adjust_bool_pattern_cast (tree type, tree var)
2869 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2870 gimple cast_stmt, pattern_stmt;
2872 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2873 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2874 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2875 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2876 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2877 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2878 return gimple_assign_lhs (cast_stmt);
2882 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2883 recursively. VAR is an SSA_NAME that should be transformed from bool
2884 to a wider integer type, OUT_TYPE is the desired final integer type of
2885 the whole pattern, TRUEVAL should be NULL unless optimizing
2886 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2887 in the then_clause, STMTS is where statements with added pattern stmts
2888 should be pushed to. */
2890 static tree
2891 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2892 vec<gimple> *stmts)
2894 gimple stmt = SSA_NAME_DEF_STMT (var);
2895 enum tree_code rhs_code, def_rhs_code;
2896 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2897 location_t loc;
2898 gimple pattern_stmt, def_stmt;
2900 rhs1 = gimple_assign_rhs1 (stmt);
2901 rhs2 = gimple_assign_rhs2 (stmt);
2902 rhs_code = gimple_assign_rhs_code (stmt);
2903 loc = gimple_location (stmt);
2904 switch (rhs_code)
2906 case SSA_NAME:
2907 CASE_CONVERT:
2908 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2909 itype = TREE_TYPE (irhs1);
2910 pattern_stmt
2911 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2912 SSA_NAME, irhs1);
2913 break;
2915 case BIT_NOT_EXPR:
2916 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2917 itype = TREE_TYPE (irhs1);
2918 pattern_stmt
2919 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2920 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
2921 break;
2923 case BIT_AND_EXPR:
2924 /* Try to optimize x = y & (a < b ? 1 : 0); into
2925 x = (a < b ? y : 0);
2927 E.g. for:
2928 bool a_b, b_b, c_b;
2929 TYPE d_T;
2931 S1 a_b = x1 CMP1 y1;
2932 S2 b_b = x2 CMP2 y2;
2933 S3 c_b = a_b & b_b;
2934 S4 d_T = (TYPE) c_b;
2936 we would normally emit:
2938 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2939 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2940 S3' c_T = a_T & b_T;
2941 S4' d_T = c_T;
2943 but we can save one stmt by using the
2944 result of one of the COND_EXPRs in the other COND_EXPR and leave
2945 BIT_AND_EXPR stmt out:
2947 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2948 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2949 S4' f_T = c_T;
2951 At least when VEC_COND_EXPR is implemented using masks
2952 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2953 computes the comparison masks and ands it, in one case with
2954 all ones vector, in the other case with a vector register.
2955 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2956 often more expensive. */
2957 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2958 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2959 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2961 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2962 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2963 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2964 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2966 gimple tstmt;
2967 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2968 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2969 tstmt = stmts->pop ();
2970 gcc_assert (tstmt == def_stmt);
2971 stmts->quick_push (stmt);
2972 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2973 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2974 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2975 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2976 return irhs2;
2978 else
2979 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2980 goto and_ior_xor;
2982 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2983 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2984 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2986 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2987 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2988 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2989 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2991 gimple tstmt;
2992 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2993 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2994 tstmt = stmts->pop ();
2995 gcc_assert (tstmt == def_stmt);
2996 stmts->quick_push (stmt);
2997 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2998 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2999 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3000 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3001 return irhs1;
3003 else
3004 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3005 goto and_ior_xor;
3007 /* FALLTHRU */
3008 case BIT_IOR_EXPR:
3009 case BIT_XOR_EXPR:
3010 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3011 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3012 and_ior_xor:
3013 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3014 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3016 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3017 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3018 int out_prec = TYPE_PRECISION (out_type);
3019 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3020 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3021 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3022 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3023 else
3025 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3026 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3029 itype = TREE_TYPE (irhs1);
3030 pattern_stmt
3031 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3032 rhs_code, irhs1, irhs2);
3033 break;
3035 default:
3036 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3037 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3038 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3039 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3040 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3042 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3043 itype
3044 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3046 else
3047 itype = TREE_TYPE (rhs1);
3048 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3049 if (trueval == NULL_TREE)
3050 trueval = build_int_cst (itype, 1);
3051 else
3052 gcc_checking_assert (useless_type_conversion_p (itype,
3053 TREE_TYPE (trueval)));
3054 pattern_stmt
3055 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3056 COND_EXPR, cond_expr, trueval,
3057 build_int_cst (itype, 0));
3058 break;
3061 stmts->safe_push (stmt);
3062 gimple_set_location (pattern_stmt, loc);
3063 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3064 return gimple_assign_lhs (pattern_stmt);
3068 /* Function vect_recog_bool_pattern
3070 Try to find pattern like following:
3072 bool a_b, b_b, c_b, d_b, e_b;
3073 TYPE f_T;
3074 loop:
3075 S1 a_b = x1 CMP1 y1;
3076 S2 b_b = x2 CMP2 y2;
3077 S3 c_b = a_b & b_b;
3078 S4 d_b = x3 CMP3 y3;
3079 S5 e_b = c_b | d_b;
3080 S6 f_T = (TYPE) e_b;
3082 where type 'TYPE' is an integral type. Or a similar pattern
3083 ending in
3085 S6 f_Y = e_b ? r_Y : s_Y;
3087 as results from if-conversion of a complex condition.
3089 Input:
3091 * LAST_STMT: A stmt at the end from which the pattern
3092 search begins, i.e. cast of a bool to
3093 an integer type.
3095 Output:
3097 * TYPE_IN: The type of the input arguments to the pattern.
3099 * TYPE_OUT: The type of the output of this pattern.
3101 * Return value: A new stmt that will be used to replace the pattern.
3103 Assuming size of TYPE is the same as size of all comparisons
3104 (otherwise some casts would be added where needed), the above
3105 sequence we create related pattern stmts:
3106 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3107 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3108 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3109 S5' e_T = c_T | d_T;
3110 S6' f_T = e_T;
3112 Instead of the above S3' we could emit:
3113 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3114 S3' c_T = a_T | b_T;
3115 but the above is more efficient. */
3117 static gimple
3118 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
3119 tree *type_out)
3121 gimple last_stmt = stmts->pop ();
3122 enum tree_code rhs_code;
3123 tree var, lhs, rhs, vectype;
3124 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3125 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3126 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
3127 gimple pattern_stmt;
3129 if (!is_gimple_assign (last_stmt))
3130 return NULL;
3132 var = gimple_assign_rhs1 (last_stmt);
3133 lhs = gimple_assign_lhs (last_stmt);
3135 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3136 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3137 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3138 return NULL;
3140 rhs_code = gimple_assign_rhs_code (last_stmt);
3141 if (CONVERT_EXPR_CODE_P (rhs_code))
3143 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3144 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3145 return NULL;
3146 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3147 if (vectype == NULL_TREE)
3148 return NULL;
3150 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3151 return NULL;
3153 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3154 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3155 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3156 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3157 else
3158 pattern_stmt
3159 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3160 *type_out = vectype;
3161 *type_in = vectype;
3162 stmts->safe_push (last_stmt);
3163 if (dump_enabled_p ())
3164 dump_printf_loc (MSG_NOTE, vect_location,
3165 "vect_recog_bool_pattern: detected:\n");
3167 return pattern_stmt;
3169 else if (rhs_code == COND_EXPR
3170 && TREE_CODE (var) == SSA_NAME)
3172 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3173 if (vectype == NULL_TREE)
3174 return NULL;
3176 /* Build a scalar type for the boolean result that when
3177 vectorized matches the vector type of the result in
3178 size and number of elements. */
3179 unsigned prec
3180 = wi::udiv_trunc (TYPE_SIZE (vectype),
3181 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3182 tree type
3183 = build_nonstandard_integer_type (prec,
3184 TYPE_UNSIGNED (TREE_TYPE (var)));
3185 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3186 return NULL;
3188 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3189 return NULL;
3191 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3192 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3193 pattern_stmt
3194 = gimple_build_assign (lhs, COND_EXPR,
3195 build2 (NE_EXPR, boolean_type_node,
3196 rhs, build_int_cst (type, 0)),
3197 gimple_assign_rhs2 (last_stmt),
3198 gimple_assign_rhs3 (last_stmt));
3199 *type_out = vectype;
3200 *type_in = vectype;
3201 stmts->safe_push (last_stmt);
3202 if (dump_enabled_p ())
3203 dump_printf_loc (MSG_NOTE, vect_location,
3204 "vect_recog_bool_pattern: detected:\n");
3206 return pattern_stmt;
3208 else if (rhs_code == SSA_NAME
3209 && STMT_VINFO_DATA_REF (stmt_vinfo))
3211 stmt_vec_info pattern_stmt_info;
3212 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3213 gcc_assert (vectype != NULL_TREE);
3214 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3215 return NULL;
3216 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3217 return NULL;
3219 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3220 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3221 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3223 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3224 gimple cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3225 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3226 rhs = rhs2;
3228 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3229 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3230 bb_vinfo);
3231 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3232 STMT_VINFO_DATA_REF (pattern_stmt_info)
3233 = STMT_VINFO_DATA_REF (stmt_vinfo);
3234 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3235 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3236 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3237 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3238 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3239 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3240 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3241 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3242 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3243 *type_out = vectype;
3244 *type_in = vectype;
3245 stmts->safe_push (last_stmt);
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_NOTE, vect_location,
3248 "vect_recog_bool_pattern: detected:\n");
3249 return pattern_stmt;
3251 else
3252 return NULL;
3256 /* Mark statements that are involved in a pattern. */
3258 static inline void
3259 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3260 tree pattern_vectype)
3262 stmt_vec_info pattern_stmt_info, def_stmt_info;
3263 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3264 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3265 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3266 gimple def_stmt;
3268 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3269 if (pattern_stmt_info == NULL)
3271 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3272 bb_vinfo);
3273 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3275 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3277 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3278 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3279 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3280 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3281 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3282 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3283 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3284 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3285 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3287 gimple_stmt_iterator si;
3288 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3289 !gsi_end_p (si); gsi_next (&si))
3291 def_stmt = gsi_stmt (si);
3292 def_stmt_info = vinfo_for_stmt (def_stmt);
3293 if (def_stmt_info == NULL)
3295 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3296 bb_vinfo);
3297 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3299 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3300 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3301 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3302 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3303 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3308 /* Function vect_pattern_recog_1
3310 Input:
3311 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3312 computation pattern.
3313 STMT: A stmt from which the pattern search should start.
3315 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3316 expression that computes the same functionality and can be used to
3317 replace the sequence of stmts that are involved in the pattern.
3319 Output:
3320 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3321 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3322 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3323 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3324 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3325 to the available target pattern.
3327 This function also does some bookkeeping, as explained in the documentation
3328 for vect_recog_pattern. */
3330 static void
3331 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3332 gimple_stmt_iterator si,
3333 vec<gimple> *stmts_to_replace)
3335 gimple stmt = gsi_stmt (si), pattern_stmt;
3336 stmt_vec_info stmt_info;
3337 loop_vec_info loop_vinfo;
3338 tree pattern_vectype;
3339 tree type_in, type_out;
3340 enum tree_code code;
3341 int i;
3342 gimple next;
3344 stmts_to_replace->truncate (0);
3345 stmts_to_replace->quick_push (stmt);
3346 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3347 if (!pattern_stmt)
3348 return;
3350 stmt = stmts_to_replace->last ();
3351 stmt_info = vinfo_for_stmt (stmt);
3352 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3354 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3356 /* No need to check target support (already checked by the pattern
3357 recognition function). */
3358 pattern_vectype = type_out ? type_out : type_in;
3360 else
3362 machine_mode vec_mode;
3363 enum insn_code icode;
3364 optab optab;
3366 /* Check target support */
3367 type_in = get_vectype_for_scalar_type (type_in);
3368 if (!type_in)
3369 return;
3370 if (type_out)
3371 type_out = get_vectype_for_scalar_type (type_out);
3372 else
3373 type_out = type_in;
3374 if (!type_out)
3375 return;
3376 pattern_vectype = type_out;
3378 if (is_gimple_assign (pattern_stmt))
3379 code = gimple_assign_rhs_code (pattern_stmt);
3380 else
3382 gcc_assert (is_gimple_call (pattern_stmt));
3383 code = CALL_EXPR;
3386 optab = optab_for_tree_code (code, type_in, optab_default);
3387 vec_mode = TYPE_MODE (type_in);
3388 if (!optab
3389 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3390 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3391 return;
3394 /* Found a vectorizable pattern. */
3395 if (dump_enabled_p ())
3397 dump_printf_loc (MSG_NOTE, vect_location,
3398 "pattern recognized: ");
3399 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3402 /* Mark the stmts that are involved in the pattern. */
3403 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3405 /* Patterns cannot be vectorized using SLP, because they change the order of
3406 computation. */
3407 if (loop_vinfo)
3408 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3409 if (next == stmt)
3410 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3412 /* It is possible that additional pattern stmts are created and inserted in
3413 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3414 relevant statements. */
3415 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3416 && (unsigned) i < (stmts_to_replace->length () - 1);
3417 i++)
3419 stmt_info = vinfo_for_stmt (stmt);
3420 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3421 if (dump_enabled_p ())
3423 dump_printf_loc (MSG_NOTE, vect_location,
3424 "additional pattern stmt: ");
3425 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3428 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3433 /* Function vect_pattern_recog
3435 Input:
3436 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3437 computation idioms.
3439 Output - for each computation idiom that is detected we create a new stmt
3440 that provides the same functionality and that can be vectorized. We
3441 also record some information in the struct_stmt_info of the relevant
3442 stmts, as explained below:
3444 At the entry to this function we have the following stmts, with the
3445 following initial value in the STMT_VINFO fields:
3447 stmt in_pattern_p related_stmt vec_stmt
3448 S1: a_i = .... - - -
3449 S2: a_2 = ..use(a_i).. - - -
3450 S3: a_1 = ..use(a_2).. - - -
3451 S4: a_0 = ..use(a_1).. - - -
3452 S5: ... = ..use(a_0).. - - -
3454 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3455 represented by a single stmt. We then:
3456 - create a new stmt S6 equivalent to the pattern (the stmt is not
3457 inserted into the code)
3458 - fill in the STMT_VINFO fields as follows:
3460 in_pattern_p related_stmt vec_stmt
3461 S1: a_i = .... - - -
3462 S2: a_2 = ..use(a_i).. - - -
3463 S3: a_1 = ..use(a_2).. - - -
3464 S4: a_0 = ..use(a_1).. true S6 -
3465 '---> S6: a_new = .... - S4 -
3466 S5: ... = ..use(a_0).. - - -
3468 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3469 to each other through the RELATED_STMT field).
3471 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3472 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3473 remain irrelevant unless used by stmts other than S4.
3475 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3476 (because they are marked as irrelevant). It will vectorize S6, and record
3477 a pointer to the new vector stmt VS6 from S6 (as usual).
3478 S4 will be skipped, and S5 will be vectorized as usual:
3480 in_pattern_p related_stmt vec_stmt
3481 S1: a_i = .... - - -
3482 S2: a_2 = ..use(a_i).. - - -
3483 S3: a_1 = ..use(a_2).. - - -
3484 > VS6: va_new = .... - - -
3485 S4: a_0 = ..use(a_1).. true S6 VS6
3486 '---> S6: a_new = .... - S4 VS6
3487 > VS5: ... = ..vuse(va_new).. - - -
3488 S5: ... = ..use(a_0).. - - -
3490 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3491 elsewhere), and we'll end up with:
3493 VS6: va_new = ....
3494 VS5: ... = ..vuse(va_new)..
3496 In case of more than one pattern statements, e.g., widen-mult with
3497 intermediate type:
3499 S1 a_t = ;
3500 S2 a_T = (TYPE) a_t;
3501 '--> S3: a_it = (interm_type) a_t;
3502 S4 prod_T = a_T * CONST;
3503 '--> S5: prod_T' = a_it w* CONST;
3505 there may be other users of a_T outside the pattern. In that case S2 will
3506 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3507 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3508 be recorded in S3. */
3510 void
3511 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3513 struct loop *loop;
3514 basic_block *bbs;
3515 unsigned int nbbs;
3516 gimple_stmt_iterator si;
3517 unsigned int i, j;
3518 vect_recog_func_ptr vect_recog_func;
3519 auto_vec<gimple, 1> stmts_to_replace;
3520 gimple stmt;
3522 if (dump_enabled_p ())
3523 dump_printf_loc (MSG_NOTE, vect_location,
3524 "=== vect_pattern_recog ===\n");
3526 if (loop_vinfo)
3528 loop = LOOP_VINFO_LOOP (loop_vinfo);
3529 bbs = LOOP_VINFO_BBS (loop_vinfo);
3530 nbbs = loop->num_nodes;
3532 else
3534 bbs = &BB_VINFO_BB (bb_vinfo);
3535 nbbs = 1;
3538 /* Scan through the loop stmts, applying the pattern recognition
3539 functions starting at each stmt visited: */
3540 for (i = 0; i < nbbs; i++)
3542 basic_block bb = bbs[i];
3543 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3545 if (bb_vinfo && (stmt = gsi_stmt (si))
3546 && vinfo_for_stmt (stmt)
3547 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3548 continue;
3550 /* Scan over all generic vect_recog_xxx_pattern functions. */
3551 for (j = 0; j < NUM_PATTERNS; j++)
3553 vect_recog_func = vect_vect_recog_func_ptrs[j];
3554 vect_pattern_recog_1 (vect_recog_func, si,
3555 &stmts_to_replace);