gcc/
[official-gcc.git] / gcc / tree-vect-patterns.c
blobd06d69666aefc8adf55c402d7aa0c1bd16fd8739
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "target.h"
38 #include "predict.h"
39 #include "hard-reg-set.h"
40 #include "function.h"
41 #include "dominance.h"
42 #include "basic-block.h"
43 #include "gimple-pretty-print.h"
44 #include "tree-ssa-alias.h"
45 #include "internal-fn.h"
46 #include "tree-eh.h"
47 #include "gimple-expr.h"
48 #include "is-a.h"
49 #include "gimple.h"
50 #include "gimplify.h"
51 #include "gimple-iterator.h"
52 #include "gimple-ssa.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "cfgloop.h"
58 #include "hashtab.h"
59 #include "rtl.h"
60 #include "flags.h"
61 #include "statistics.h"
62 #include "real.h"
63 #include "fixed-value.h"
64 #include "insn-config.h"
65 #include "expmed.h"
66 #include "dojump.h"
67 #include "explow.h"
68 #include "calls.h"
69 #include "emit-rtl.h"
70 #include "varasm.h"
71 #include "stmt.h"
72 #include "expr.h"
73 #include "insn-codes.h"
74 #include "optabs.h"
75 #include "params.h"
76 #include "tree-data-ref.h"
77 #include "tree-vectorizer.h"
78 #include "recog.h" /* FIXME: for insn_data */
79 #include "diagnostic-core.h"
80 #include "dumpfile.h"
81 #include "builtins.h"
83 /* Pattern recognition functions */
84 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
85 tree *);
86 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
87 tree *);
88 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
89 tree *);
90 static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
91 tree *);
92 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
93 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
94 tree *);
95 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
96 tree *, tree *);
97 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
98 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
99 tree *, tree *);
100 static gimple vect_recog_divmod_pattern (vec<gimple> *,
101 tree *, tree *);
102 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
103 tree *, tree *);
104 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
105 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
106 vect_recog_widen_mult_pattern,
107 vect_recog_widen_sum_pattern,
108 vect_recog_dot_prod_pattern,
109 vect_recog_sad_pattern,
110 vect_recog_pow_pattern,
111 vect_recog_widen_shift_pattern,
112 vect_recog_over_widening_pattern,
113 vect_recog_rotate_pattern,
114 vect_recog_vector_vector_shift_pattern,
115 vect_recog_divmod_pattern,
116 vect_recog_mixed_size_cond_pattern,
117 vect_recog_bool_pattern};
119 static inline void
120 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
122 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
123 stmt);
126 static inline void
127 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
129 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
130 append_pattern_def_seq (stmt_info, stmt);
133 /* Check whether STMT2 is in the same loop or basic block as STMT1.
134 Which of the two applies depends on whether we're currently doing
135 loop-based or basic-block-based vectorization, as determined by
136 the vinfo_for_stmt for STMT1 (which must be defined).
138 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
139 to be defined as well. */
141 static bool
142 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
144 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
145 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
146 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
148 if (!gimple_bb (stmt2))
149 return false;
151 if (loop_vinfo)
153 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
154 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
155 return false;
157 else
159 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
160 || gimple_code (stmt2) == GIMPLE_PHI)
161 return false;
164 gcc_assert (vinfo_for_stmt (stmt2));
165 return true;
168 /* If the LHS of DEF_STMT has a single use, and that statement is
169 in the same loop or basic block, return it. */
171 static gimple
172 vect_single_imm_use (gimple def_stmt)
174 tree lhs = gimple_assign_lhs (def_stmt);
175 use_operand_p use_p;
176 gimple use_stmt;
178 if (!single_imm_use (lhs, &use_p, &use_stmt))
179 return NULL;
181 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
182 return NULL;
184 return use_stmt;
187 /* Check whether NAME, an ssa-name used in USE_STMT,
188 is a result of a type promotion, such that:
189 DEF_STMT: NAME = NOP (name0)
190 If CHECK_SIGN is TRUE, check that either both types are signed or both are
191 unsigned. */
193 static bool
194 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
195 tree *orig_type, gimple *def_stmt, bool *promotion)
197 tree dummy;
198 gimple dummy_gimple;
199 loop_vec_info loop_vinfo;
200 stmt_vec_info stmt_vinfo;
201 tree type = TREE_TYPE (name);
202 tree oprnd0;
203 enum vect_def_type dt;
204 tree def;
205 bb_vec_info bb_vinfo;
207 stmt_vinfo = vinfo_for_stmt (use_stmt);
208 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
209 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
210 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
211 &def, &dt))
212 return false;
214 if (dt != vect_internal_def
215 && dt != vect_external_def && dt != vect_constant_def)
216 return false;
218 if (!*def_stmt)
219 return false;
221 if (!is_gimple_assign (*def_stmt))
222 return false;
224 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
225 return false;
227 oprnd0 = gimple_assign_rhs1 (*def_stmt);
229 *orig_type = TREE_TYPE (oprnd0);
230 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
231 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
232 return false;
234 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
235 *promotion = true;
236 else
237 *promotion = false;
239 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
240 bb_vinfo, &dummy_gimple, &dummy, &dt))
241 return false;
243 return true;
246 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
247 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
249 static tree
250 vect_recog_temp_ssa_var (tree type, gimple stmt)
252 return make_temp_ssa_name (type, stmt, "patt");
255 /* Function vect_recog_dot_prod_pattern
257 Try to find the following pattern:
259 type x_t, y_t;
260 TYPE1 prod;
261 TYPE2 sum = init;
262 loop:
263 sum_0 = phi <init, sum_1>
264 S1 x_t = ...
265 S2 y_t = ...
266 S3 x_T = (TYPE1) x_t;
267 S4 y_T = (TYPE1) y_t;
268 S5 prod = x_T * y_T;
269 [S6 prod = (TYPE2) prod; #optional]
270 S7 sum_1 = prod + sum_0;
272 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
273 same size of 'TYPE1' or bigger. This is a special case of a reduction
274 computation.
276 Input:
278 * STMTS: Contains a stmt from which the pattern search begins. In the
279 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
280 will be detected.
282 Output:
284 * TYPE_IN: The type of the input arguments to the pattern.
286 * TYPE_OUT: The type of the output of this pattern.
288 * Return value: A new stmt that will be used to replace the sequence of
289 stmts that constitute the pattern. In this case it will be:
290 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
292 Note: The dot-prod idiom is a widening reduction pattern that is
293 vectorized without preserving all the intermediate results. It
294 produces only N/2 (widened) results (by summing up pairs of
295 intermediate results) rather than all N results. Therefore, we
296 cannot allow this pattern when we want to get all the results and in
297 the correct order (as is the case when this computation is in an
298 inner-loop nested in an outer-loop that us being vectorized). */
300 static gimple
301 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
302 tree *type_out)
304 gimple stmt, last_stmt = (*stmts)[0];
305 tree oprnd0, oprnd1;
306 tree oprnd00, oprnd01;
307 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
308 tree type, half_type;
309 gimple pattern_stmt;
310 tree prod_type;
311 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
312 struct loop *loop;
313 tree var;
314 bool promotion;
316 if (!loop_info)
317 return NULL;
319 loop = LOOP_VINFO_LOOP (loop_info);
321 if (!is_gimple_assign (last_stmt))
322 return NULL;
324 type = gimple_expr_type (last_stmt);
326 /* Look for the following pattern
327 DX = (TYPE1) X;
328 DY = (TYPE1) Y;
329 DPROD = DX * DY;
330 DDPROD = (TYPE2) DPROD;
331 sum_1 = DDPROD + sum_0;
332 In which
333 - DX is double the size of X
334 - DY is double the size of Y
335 - DX, DY, DPROD all have the same type
336 - sum is the same size of DPROD or bigger
337 - sum has been recognized as a reduction variable.
339 This is equivalent to:
340 DPROD = X w* Y; #widen mult
341 sum_1 = DPROD w+ sum_0; #widen summation
343 DPROD = X w* Y; #widen mult
344 sum_1 = DPROD + sum_0; #summation
347 /* Starting from LAST_STMT, follow the defs of its uses in search
348 of the above pattern. */
350 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
351 return NULL;
353 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
355 /* Has been detected as widening-summation? */
357 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
358 type = gimple_expr_type (stmt);
359 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
360 return NULL;
361 oprnd0 = gimple_assign_rhs1 (stmt);
362 oprnd1 = gimple_assign_rhs2 (stmt);
363 half_type = TREE_TYPE (oprnd0);
365 else
367 gimple def_stmt;
369 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
370 return NULL;
371 oprnd0 = gimple_assign_rhs1 (last_stmt);
372 oprnd1 = gimple_assign_rhs2 (last_stmt);
373 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
374 || !types_compatible_p (TREE_TYPE (oprnd1), type))
375 return NULL;
376 stmt = last_stmt;
378 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
379 &promotion)
380 && promotion)
382 stmt = def_stmt;
383 oprnd0 = gimple_assign_rhs1 (stmt);
385 else
386 half_type = type;
389 /* So far so good. Since last_stmt was detected as a (summation) reduction,
390 we know that oprnd1 is the reduction variable (defined by a loop-header
391 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
392 Left to check that oprnd0 is defined by a (widen_)mult_expr */
393 if (TREE_CODE (oprnd0) != SSA_NAME)
394 return NULL;
396 prod_type = half_type;
397 stmt = SSA_NAME_DEF_STMT (oprnd0);
399 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
400 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
401 return NULL;
403 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
404 inside the loop (in case we are analyzing an outer-loop). */
405 if (!is_gimple_assign (stmt))
406 return NULL;
407 stmt_vinfo = vinfo_for_stmt (stmt);
408 gcc_assert (stmt_vinfo);
409 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
410 return NULL;
411 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
412 return NULL;
413 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
415 /* Has been detected as a widening multiplication? */
417 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
418 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
419 return NULL;
420 stmt_vinfo = vinfo_for_stmt (stmt);
421 gcc_assert (stmt_vinfo);
422 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
423 oprnd00 = gimple_assign_rhs1 (stmt);
424 oprnd01 = gimple_assign_rhs2 (stmt);
425 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
426 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
428 else
430 tree half_type0, half_type1;
431 gimple def_stmt;
432 tree oprnd0, oprnd1;
434 oprnd0 = gimple_assign_rhs1 (stmt);
435 oprnd1 = gimple_assign_rhs2 (stmt);
436 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
437 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
438 return NULL;
439 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
440 &promotion)
441 || !promotion)
442 return NULL;
443 oprnd00 = gimple_assign_rhs1 (def_stmt);
444 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
445 &promotion)
446 || !promotion)
447 return NULL;
448 oprnd01 = gimple_assign_rhs1 (def_stmt);
449 if (!types_compatible_p (half_type0, half_type1))
450 return NULL;
451 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
452 return NULL;
455 half_type = TREE_TYPE (oprnd00);
456 *type_in = half_type;
457 *type_out = type;
459 /* Pattern detected. Create a stmt to be used to replace the pattern: */
460 var = vect_recog_temp_ssa_var (type, NULL);
461 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
462 oprnd00, oprnd01, oprnd1);
464 if (dump_enabled_p ())
466 dump_printf_loc (MSG_NOTE, vect_location,
467 "vect_recog_dot_prod_pattern: detected: ");
468 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
469 dump_printf (MSG_NOTE, "\n");
472 /* We don't allow changing the order of the computation in the inner-loop
473 when doing outer-loop vectorization. */
474 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
476 return pattern_stmt;
480 /* Function vect_recog_sad_pattern
482 Try to find the following Sum of Absolute Difference (SAD) pattern:
484 type x_t, y_t;
485 signed TYPE1 diff, abs_diff;
486 TYPE2 sum = init;
487 loop:
488 sum_0 = phi <init, sum_1>
489 S1 x_t = ...
490 S2 y_t = ...
491 S3 x_T = (TYPE1) x_t;
492 S4 y_T = (TYPE1) y_t;
493 S5 diff = x_T - y_T;
494 S6 abs_diff = ABS_EXPR <diff>;
495 [S7 abs_diff = (TYPE2) abs_diff; #optional]
496 S8 sum_1 = abs_diff + sum_0;
498 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
499 same size of 'TYPE1' or bigger. This is a special case of a reduction
500 computation.
502 Input:
504 * STMTS: Contains a stmt from which the pattern search begins. In the
505 example, when this function is called with S8, the pattern
506 {S3,S4,S5,S6,S7,S8} will be detected.
508 Output:
510 * TYPE_IN: The type of the input arguments to the pattern.
512 * TYPE_OUT: The type of the output of this pattern.
514 * Return value: A new stmt that will be used to replace the sequence of
515 stmts that constitute the pattern. In this case it will be:
516 SAD_EXPR <x_t, y_t, sum_0>
519 static gimple
520 vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
521 tree *type_out)
523 gimple last_stmt = (*stmts)[0];
524 tree sad_oprnd0, sad_oprnd1;
525 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
526 tree half_type;
527 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
528 struct loop *loop;
529 bool promotion;
531 if (!loop_info)
532 return NULL;
534 loop = LOOP_VINFO_LOOP (loop_info);
536 if (!is_gimple_assign (last_stmt))
537 return NULL;
539 tree sum_type = gimple_expr_type (last_stmt);
541 /* Look for the following pattern
542 DX = (TYPE1) X;
543 DY = (TYPE1) Y;
544 DDIFF = DX - DY;
545 DAD = ABS_EXPR <DDIFF>;
546 DDPROD = (TYPE2) DPROD;
547 sum_1 = DAD + sum_0;
548 In which
549 - DX is at least double the size of X
550 - DY is at least double the size of Y
551 - DX, DY, DDIFF, DAD all have the same type
552 - sum is the same size of DAD or bigger
553 - sum has been recognized as a reduction variable.
555 This is equivalent to:
556 DDIFF = X w- Y; #widen sub
557 DAD = ABS_EXPR <DDIFF>;
558 sum_1 = DAD w+ sum_0; #widen summation
560 DDIFF = X w- Y; #widen sub
561 DAD = ABS_EXPR <DDIFF>;
562 sum_1 = DAD + sum_0; #summation
565 /* Starting from LAST_STMT, follow the defs of its uses in search
566 of the above pattern. */
568 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
569 return NULL;
571 tree plus_oprnd0, plus_oprnd1;
573 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
575 /* Has been detected as widening-summation? */
577 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
578 sum_type = gimple_expr_type (stmt);
579 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
580 return NULL;
581 plus_oprnd0 = gimple_assign_rhs1 (stmt);
582 plus_oprnd1 = gimple_assign_rhs2 (stmt);
583 half_type = TREE_TYPE (plus_oprnd0);
585 else
587 gimple def_stmt;
589 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
590 return NULL;
591 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
592 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
593 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
594 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
595 return NULL;
597 /* The type conversion could be promotion, demotion,
598 or just signed -> unsigned. */
599 if (type_conversion_p (plus_oprnd0, last_stmt, false,
600 &half_type, &def_stmt, &promotion))
601 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
602 else
603 half_type = sum_type;
606 /* So far so good. Since last_stmt was detected as a (summation) reduction,
607 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
608 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
609 Then check that plus_oprnd0 is defined by an abs_expr. */
611 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
612 return NULL;
614 tree abs_type = half_type;
615 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
617 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
618 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
619 return NULL;
621 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
622 inside the loop (in case we are analyzing an outer-loop). */
623 if (!is_gimple_assign (abs_stmt))
624 return NULL;
626 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
627 gcc_assert (abs_stmt_vinfo);
628 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
629 return NULL;
630 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
631 return NULL;
633 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
634 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
635 return NULL;
636 if (TYPE_UNSIGNED (abs_type))
637 return NULL;
639 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
641 if (TREE_CODE (abs_oprnd) != SSA_NAME)
642 return NULL;
644 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
646 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
647 if (!gimple_bb (diff_stmt)
648 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
649 return NULL;
651 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
652 inside the loop (in case we are analyzing an outer-loop). */
653 if (!is_gimple_assign (diff_stmt))
654 return NULL;
656 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
657 gcc_assert (diff_stmt_vinfo);
658 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
659 return NULL;
660 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
661 return NULL;
663 tree half_type0, half_type1;
664 gimple def_stmt;
666 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
667 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
669 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
670 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
671 return NULL;
672 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
673 &half_type0, &def_stmt, &promotion)
674 || !promotion)
675 return NULL;
676 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
678 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
679 &half_type1, &def_stmt, &promotion)
680 || !promotion)
681 return NULL;
682 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
684 if (!types_compatible_p (half_type0, half_type1))
685 return NULL;
686 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
687 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
688 return NULL;
690 *type_in = TREE_TYPE (sad_oprnd0);
691 *type_out = sum_type;
693 /* Pattern detected. Create a stmt to be used to replace the pattern: */
694 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
695 gimple pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
696 sad_oprnd1, plus_oprnd1);
698 if (dump_enabled_p ())
700 dump_printf_loc (MSG_NOTE, vect_location,
701 "vect_recog_sad_pattern: detected: ");
702 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
703 dump_printf (MSG_NOTE, "\n");
706 /* We don't allow changing the order of the computation in the inner-loop
707 when doing outer-loop vectorization. */
708 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
710 return pattern_stmt;
714 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
715 and LSHIFT_EXPR.
717 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
718 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
720 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
721 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
722 that satisfies the above restrictions, we can perform a widening opeartion
723 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
724 with a_it = (interm_type) a_t; */
726 static bool
727 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
728 tree const_oprnd, tree *oprnd,
729 vec<gimple> *stmts, tree type,
730 tree *half_type, gimple def_stmt)
732 tree new_type, new_oprnd;
733 gimple new_stmt;
735 if (code != MULT_EXPR && code != LSHIFT_EXPR)
736 return false;
738 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
739 || (code == LSHIFT_EXPR
740 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
741 != 1))
742 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
744 /* CONST_OPRND is a constant of HALF_TYPE. */
745 *oprnd = gimple_assign_rhs1 (def_stmt);
746 return true;
749 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
750 return false;
752 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
753 return false;
755 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
756 a type 2 times bigger than HALF_TYPE. */
757 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
758 TYPE_UNSIGNED (type));
759 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
760 || (code == LSHIFT_EXPR
761 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
762 return false;
764 /* Use NEW_TYPE for widening operation. */
765 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
767 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
768 /* Check if the already created pattern stmt is what we need. */
769 if (!is_gimple_assign (new_stmt)
770 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
771 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
772 return false;
774 stmts->safe_push (def_stmt);
775 *oprnd = gimple_assign_lhs (new_stmt);
777 else
779 /* Create a_T = (NEW_TYPE) a_t; */
780 *oprnd = gimple_assign_rhs1 (def_stmt);
781 new_oprnd = make_ssa_name (new_type);
782 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
783 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
784 stmts->safe_push (def_stmt);
785 *oprnd = new_oprnd;
788 *half_type = new_type;
789 return true;
793 /* Function vect_recog_widen_mult_pattern
795 Try to find the following pattern:
797 type1 a_t;
798 type2 b_t;
799 TYPE a_T, b_T, prod_T;
801 S1 a_t = ;
802 S2 b_t = ;
803 S3 a_T = (TYPE) a_t;
804 S4 b_T = (TYPE) b_t;
805 S5 prod_T = a_T * b_T;
807 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
809 Also detect unsigned cases:
811 unsigned type1 a_t;
812 unsigned type2 b_t;
813 unsigned TYPE u_prod_T;
814 TYPE a_T, b_T, prod_T;
816 S1 a_t = ;
817 S2 b_t = ;
818 S3 a_T = (TYPE) a_t;
819 S4 b_T = (TYPE) b_t;
820 S5 prod_T = a_T * b_T;
821 S6 u_prod_T = (unsigned TYPE) prod_T;
823 and multiplication by constants:
825 type a_t;
826 TYPE a_T, prod_T;
828 S1 a_t = ;
829 S3 a_T = (TYPE) a_t;
830 S5 prod_T = a_T * CONST;
832 A special case of multiplication by constants is when 'TYPE' is 4 times
833 bigger than 'type', but CONST fits an intermediate type 2 times smaller
834 than 'TYPE'. In that case we create an additional pattern stmt for S3
835 to create a variable of the intermediate type, and perform widen-mult
836 on the intermediate type as well:
838 type a_t;
839 interm_type a_it;
840 TYPE a_T, prod_T, prod_T';
842 S1 a_t = ;
843 S3 a_T = (TYPE) a_t;
844 '--> a_it = (interm_type) a_t;
845 S5 prod_T = a_T * CONST;
846 '--> prod_T' = a_it w* CONST;
848 Input/Output:
850 * STMTS: Contains a stmt from which the pattern search begins. In the
851 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
852 is detected. In case of unsigned widen-mult, the original stmt (S5) is
853 replaced with S6 in STMTS. In case of multiplication by a constant
854 of an intermediate type (the last case above), STMTS also contains S3
855 (inserted before S5).
857 Output:
859 * TYPE_IN: The type of the input arguments to the pattern.
861 * TYPE_OUT: The type of the output of this pattern.
863 * Return value: A new stmt that will be used to replace the sequence of
864 stmts that constitute the pattern. In this case it will be:
865 WIDEN_MULT <a_t, b_t>
866 If the result of WIDEN_MULT needs to be converted to a larger type, the
867 returned stmt will be this type conversion stmt.
870 static gimple
871 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
872 tree *type_in, tree *type_out)
874 gimple last_stmt = stmts->pop ();
875 gimple def_stmt0, def_stmt1;
876 tree oprnd0, oprnd1;
877 tree type, half_type0, half_type1;
878 gimple new_stmt = NULL, pattern_stmt = NULL;
879 tree vectype, vecitype;
880 tree var;
881 enum tree_code dummy_code;
882 int dummy_int;
883 vec<tree> dummy_vec;
884 bool op1_ok;
885 bool promotion;
887 if (!is_gimple_assign (last_stmt))
888 return NULL;
890 type = gimple_expr_type (last_stmt);
892 /* Starting from LAST_STMT, follow the defs of its uses in search
893 of the above pattern. */
895 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
896 return NULL;
898 oprnd0 = gimple_assign_rhs1 (last_stmt);
899 oprnd1 = gimple_assign_rhs2 (last_stmt);
900 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
901 || !types_compatible_p (TREE_TYPE (oprnd1), type))
902 return NULL;
904 /* Check argument 0. */
905 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
906 &promotion)
907 || !promotion)
908 return NULL;
909 /* Check argument 1. */
910 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
911 &def_stmt1, &promotion);
913 if (op1_ok && promotion)
915 oprnd0 = gimple_assign_rhs1 (def_stmt0);
916 oprnd1 = gimple_assign_rhs1 (def_stmt1);
918 else
920 if (TREE_CODE (oprnd1) == INTEGER_CST
921 && TREE_CODE (half_type0) == INTEGER_TYPE
922 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
923 &oprnd0, stmts, type,
924 &half_type0, def_stmt0))
926 half_type1 = half_type0;
927 oprnd1 = fold_convert (half_type1, oprnd1);
929 else
930 return NULL;
933 /* If the two arguments have different sizes, convert the one with
934 the smaller type into the larger type. */
935 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
937 tree* oprnd = NULL;
938 gimple def_stmt = NULL;
940 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
942 def_stmt = def_stmt0;
943 half_type0 = half_type1;
944 oprnd = &oprnd0;
946 else
948 def_stmt = def_stmt1;
949 half_type1 = half_type0;
950 oprnd = &oprnd1;
953 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
954 tree new_oprnd = make_ssa_name (half_type0);
955 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
956 *oprnd = new_oprnd;
959 /* Handle unsigned case. Look for
960 S6 u_prod_T = (unsigned TYPE) prod_T;
961 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
962 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
964 gimple use_stmt;
965 tree use_lhs;
966 tree use_type;
968 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
969 return NULL;
971 use_stmt = vect_single_imm_use (last_stmt);
972 if (!use_stmt || !is_gimple_assign (use_stmt)
973 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
974 return NULL;
976 use_lhs = gimple_assign_lhs (use_stmt);
977 use_type = TREE_TYPE (use_lhs);
978 if (!INTEGRAL_TYPE_P (use_type)
979 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
980 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
981 return NULL;
983 type = use_type;
984 last_stmt = use_stmt;
987 if (!types_compatible_p (half_type0, half_type1))
988 return NULL;
990 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
991 to get an intermediate result of type ITYPE. In this case we need
992 to build a statement to convert this intermediate result to type TYPE. */
993 tree itype = type;
994 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
995 itype = build_nonstandard_integer_type
996 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
997 TYPE_UNSIGNED (type));
999 /* Pattern detected. */
1000 if (dump_enabled_p ())
1001 dump_printf_loc (MSG_NOTE, vect_location,
1002 "vect_recog_widen_mult_pattern: detected:\n");
1004 /* Check target support */
1005 vectype = get_vectype_for_scalar_type (half_type0);
1006 vecitype = get_vectype_for_scalar_type (itype);
1007 if (!vectype
1008 || !vecitype
1009 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
1010 vecitype, vectype,
1011 &dummy_code, &dummy_code,
1012 &dummy_int, &dummy_vec))
1013 return NULL;
1015 *type_in = vectype;
1016 *type_out = get_vectype_for_scalar_type (type);
1018 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1019 var = vect_recog_temp_ssa_var (itype, NULL);
1020 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
1022 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1023 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1024 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1025 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1027 /* If the original two operands have different sizes, we may need to convert
1028 the smaller one into the larget type. If this is the case, at this point
1029 the new stmt is already built. */
1030 if (new_stmt)
1032 append_pattern_def_seq (stmt_vinfo, new_stmt);
1033 stmt_vec_info new_stmt_info
1034 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
1035 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1036 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1039 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1040 the result of the widen-mult operation into type TYPE. */
1041 if (itype != type)
1043 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1044 stmt_vec_info pattern_stmt_info
1045 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1046 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1047 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1048 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1049 NOP_EXPR,
1050 gimple_assign_lhs (pattern_stmt));
1053 if (dump_enabled_p ())
1054 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1056 stmts->safe_push (last_stmt);
1057 return pattern_stmt;
1061 /* Function vect_recog_pow_pattern
1063 Try to find the following pattern:
1065 x = POW (y, N);
1067 with POW being one of pow, powf, powi, powif and N being
1068 either 2 or 0.5.
1070 Input:
1072 * LAST_STMT: A stmt from which the pattern search begins.
1074 Output:
1076 * TYPE_IN: The type of the input arguments to the pattern.
1078 * TYPE_OUT: The type of the output of this pattern.
1080 * Return value: A new stmt that will be used to replace the sequence of
1081 stmts that constitute the pattern. In this case it will be:
1082 x = x * x
1084 x = sqrt (x)
1087 static gimple
1088 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
1089 tree *type_out)
1091 gimple last_stmt = (*stmts)[0];
1092 tree fn, base, exp = NULL;
1093 gimple stmt;
1094 tree var;
1096 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1097 return NULL;
1099 fn = gimple_call_fndecl (last_stmt);
1100 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1101 return NULL;
1103 switch (DECL_FUNCTION_CODE (fn))
1105 case BUILT_IN_POWIF:
1106 case BUILT_IN_POWI:
1107 case BUILT_IN_POWF:
1108 case BUILT_IN_POW:
1109 base = gimple_call_arg (last_stmt, 0);
1110 exp = gimple_call_arg (last_stmt, 1);
1111 if (TREE_CODE (exp) != REAL_CST
1112 && TREE_CODE (exp) != INTEGER_CST)
1113 return NULL;
1114 break;
1116 default:
1117 return NULL;
1120 /* We now have a pow or powi builtin function call with a constant
1121 exponent. */
1123 *type_out = NULL_TREE;
1125 /* Catch squaring. */
1126 if ((tree_fits_shwi_p (exp)
1127 && tree_to_shwi (exp) == 2)
1128 || (TREE_CODE (exp) == REAL_CST
1129 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
1131 *type_in = TREE_TYPE (base);
1133 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1134 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1135 return stmt;
1138 /* Catch square root. */
1139 if (TREE_CODE (exp) == REAL_CST
1140 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1142 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1143 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1144 if (*type_in)
1146 gcall *stmt = gimple_build_call (newfn, 1, base);
1147 if (vectorizable_function (stmt, *type_in, *type_in)
1148 != NULL_TREE)
1150 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1151 gimple_call_set_lhs (stmt, var);
1152 return stmt;
1157 return NULL;
1161 /* Function vect_recog_widen_sum_pattern
1163 Try to find the following pattern:
1165 type x_t;
1166 TYPE x_T, sum = init;
1167 loop:
1168 sum_0 = phi <init, sum_1>
1169 S1 x_t = *p;
1170 S2 x_T = (TYPE) x_t;
1171 S3 sum_1 = x_T + sum_0;
1173 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1174 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1175 a special case of a reduction computation.
1177 Input:
1179 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1180 when this function is called with S3, the pattern {S2,S3} will be detected.
1182 Output:
1184 * TYPE_IN: The type of the input arguments to the pattern.
1186 * TYPE_OUT: The type of the output of this pattern.
1188 * Return value: A new stmt that will be used to replace the sequence of
1189 stmts that constitute the pattern. In this case it will be:
1190 WIDEN_SUM <x_t, sum_0>
1192 Note: The widening-sum idiom is a widening reduction pattern that is
1193 vectorized without preserving all the intermediate results. It
1194 produces only N/2 (widened) results (by summing up pairs of
1195 intermediate results) rather than all N results. Therefore, we
1196 cannot allow this pattern when we want to get all the results and in
1197 the correct order (as is the case when this computation is in an
1198 inner-loop nested in an outer-loop that us being vectorized). */
1200 static gimple
1201 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
1202 tree *type_out)
1204 gimple stmt, last_stmt = (*stmts)[0];
1205 tree oprnd0, oprnd1;
1206 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1207 tree type, half_type;
1208 gimple pattern_stmt;
1209 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1210 struct loop *loop;
1211 tree var;
1212 bool promotion;
1214 if (!loop_info)
1215 return NULL;
1217 loop = LOOP_VINFO_LOOP (loop_info);
1219 if (!is_gimple_assign (last_stmt))
1220 return NULL;
1222 type = gimple_expr_type (last_stmt);
1224 /* Look for the following pattern
1225 DX = (TYPE) X;
1226 sum_1 = DX + sum_0;
1227 In which DX is at least double the size of X, and sum_1 has been
1228 recognized as a reduction variable.
1231 /* Starting from LAST_STMT, follow the defs of its uses in search
1232 of the above pattern. */
1234 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1235 return NULL;
1237 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
1238 return NULL;
1240 oprnd0 = gimple_assign_rhs1 (last_stmt);
1241 oprnd1 = gimple_assign_rhs2 (last_stmt);
1242 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1243 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1244 return NULL;
1246 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1247 we know that oprnd1 is the reduction variable (defined by a loop-header
1248 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1249 Left to check that oprnd0 is defined by a cast from type 'type' to type
1250 'TYPE'. */
1252 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1253 &promotion)
1254 || !promotion)
1255 return NULL;
1257 oprnd0 = gimple_assign_rhs1 (stmt);
1258 *type_in = half_type;
1259 *type_out = type;
1261 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1262 var = vect_recog_temp_ssa_var (type, NULL);
1263 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1265 if (dump_enabled_p ())
1267 dump_printf_loc (MSG_NOTE, vect_location,
1268 "vect_recog_widen_sum_pattern: detected: ");
1269 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1270 dump_printf (MSG_NOTE, "\n");
1273 /* We don't allow changing the order of the computation in the inner-loop
1274 when doing outer-loop vectorization. */
1275 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
1277 return pattern_stmt;
1281 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1283 Input:
1284 STMT - a statement to check.
1285 DEF - we support operations with two operands, one of which is constant.
1286 The other operand can be defined by a demotion operation, or by a
1287 previous statement in a sequence of over-promoted operations. In the
1288 later case DEF is used to replace that operand. (It is defined by a
1289 pattern statement we created for the previous statement in the
1290 sequence).
1292 Input/output:
1293 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1294 NULL, it's the type of DEF.
1295 STMTS - additional pattern statements. If a pattern statement (type
1296 conversion) is created in this function, its original statement is
1297 added to STMTS.
1299 Output:
1300 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1301 operands to use in the new pattern statement for STMT (will be created
1302 in vect_recog_over_widening_pattern ()).
1303 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1304 statements for STMT: the first one is a type promotion and the second
1305 one is the operation itself. We return the type promotion statement
1306 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1307 the second pattern statement. */
1309 static bool
1310 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1311 tree *op0, tree *op1, gimple *new_def_stmt,
1312 vec<gimple> *stmts)
1314 enum tree_code code;
1315 tree const_oprnd, oprnd;
1316 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1317 gimple def_stmt, new_stmt;
1318 bool first = false;
1319 bool promotion;
1321 *op0 = NULL_TREE;
1322 *op1 = NULL_TREE;
1323 *new_def_stmt = NULL;
1325 if (!is_gimple_assign (stmt))
1326 return false;
1328 code = gimple_assign_rhs_code (stmt);
1329 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1330 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1331 return false;
1333 oprnd = gimple_assign_rhs1 (stmt);
1334 const_oprnd = gimple_assign_rhs2 (stmt);
1335 type = gimple_expr_type (stmt);
1337 if (TREE_CODE (oprnd) != SSA_NAME
1338 || TREE_CODE (const_oprnd) != INTEGER_CST)
1339 return false;
1341 /* If oprnd has other uses besides that in stmt we cannot mark it
1342 as being part of a pattern only. */
1343 if (!has_single_use (oprnd))
1344 return false;
1346 /* If we are in the middle of a sequence, we use DEF from a previous
1347 statement. Otherwise, OPRND has to be a result of type promotion. */
1348 if (*new_type)
1350 half_type = *new_type;
1351 oprnd = def;
1353 else
1355 first = true;
1356 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1357 &promotion)
1358 || !promotion
1359 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1360 return false;
1363 /* Can we perform the operation on a smaller type? */
1364 switch (code)
1366 case BIT_IOR_EXPR:
1367 case BIT_XOR_EXPR:
1368 case BIT_AND_EXPR:
1369 if (!int_fits_type_p (const_oprnd, half_type))
1371 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1372 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1373 return false;
1375 interm_type = build_nonstandard_integer_type (
1376 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1377 if (!int_fits_type_p (const_oprnd, interm_type))
1378 return false;
1381 break;
1383 case LSHIFT_EXPR:
1384 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1385 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1386 return false;
1388 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1389 (e.g., if the original value was char, the shift amount is at most 8
1390 if we want to use short). */
1391 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1392 return false;
1394 interm_type = build_nonstandard_integer_type (
1395 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1397 if (!vect_supportable_shift (code, interm_type))
1398 return false;
1400 break;
1402 case RSHIFT_EXPR:
1403 if (vect_supportable_shift (code, half_type))
1404 break;
1406 /* Try intermediate type - HALF_TYPE is not supported. */
1407 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1408 return false;
1410 interm_type = build_nonstandard_integer_type (
1411 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1413 if (!vect_supportable_shift (code, interm_type))
1414 return false;
1416 break;
1418 default:
1419 gcc_unreachable ();
1422 /* There are four possible cases:
1423 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1424 the first statement in the sequence)
1425 a. The original, HALF_TYPE, is not enough - we replace the promotion
1426 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1427 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1428 promotion.
1429 2. OPRND is defined by a pattern statement we created.
1430 a. Its type is not sufficient for the operation, we create a new stmt:
1431 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1432 this statement in NEW_DEF_STMT, and it is later put in
1433 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1434 b. OPRND is good to use in the new statement. */
1435 if (first)
1437 if (interm_type)
1439 /* Replace the original type conversion HALF_TYPE->TYPE with
1440 HALF_TYPE->INTERM_TYPE. */
1441 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1443 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1444 /* Check if the already created pattern stmt is what we need. */
1445 if (!is_gimple_assign (new_stmt)
1446 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1447 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1448 return false;
1450 stmts->safe_push (def_stmt);
1451 oprnd = gimple_assign_lhs (new_stmt);
1453 else
1455 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1456 oprnd = gimple_assign_rhs1 (def_stmt);
1457 new_oprnd = make_ssa_name (interm_type);
1458 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1459 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1460 stmts->safe_push (def_stmt);
1461 oprnd = new_oprnd;
1464 else
1466 /* Retrieve the operand before the type promotion. */
1467 oprnd = gimple_assign_rhs1 (def_stmt);
1470 else
1472 if (interm_type)
1474 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1475 new_oprnd = make_ssa_name (interm_type);
1476 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1477 oprnd = new_oprnd;
1478 *new_def_stmt = new_stmt;
1481 /* Otherwise, OPRND is already set. */
1484 if (interm_type)
1485 *new_type = interm_type;
1486 else
1487 *new_type = half_type;
1489 *op0 = oprnd;
1490 *op1 = fold_convert (*new_type, const_oprnd);
1492 return true;
1496 /* Try to find a statement or a sequence of statements that can be performed
1497 on a smaller type:
1499 type x_t;
1500 TYPE x_T, res0_T, res1_T;
1501 loop:
1502 S1 x_t = *p;
1503 S2 x_T = (TYPE) x_t;
1504 S3 res0_T = op (x_T, C0);
1505 S4 res1_T = op (res0_T, C1);
1506 S5 ... = () res1_T; - type demotion
1508 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1509 constants.
1510 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1511 be 'type' or some intermediate type. For now, we expect S5 to be a type
1512 demotion operation. We also check that S3 and S4 have only one use. */
1514 static gimple
1515 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1516 tree *type_in, tree *type_out)
1518 gimple stmt = stmts->pop ();
1519 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1520 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1521 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1522 bool first;
1523 tree type = NULL;
1525 first = true;
1526 while (1)
1528 if (!vinfo_for_stmt (stmt)
1529 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1530 return NULL;
1532 new_def_stmt = NULL;
1533 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1534 &op0, &op1, &new_def_stmt,
1535 stmts))
1537 if (first)
1538 return NULL;
1539 else
1540 break;
1543 /* STMT can be performed on a smaller type. Check its uses. */
1544 use_stmt = vect_single_imm_use (stmt);
1545 if (!use_stmt || !is_gimple_assign (use_stmt))
1546 return NULL;
1548 /* Create pattern statement for STMT. */
1549 vectype = get_vectype_for_scalar_type (new_type);
1550 if (!vectype)
1551 return NULL;
1553 /* We want to collect all the statements for which we create pattern
1554 statetments, except for the case when the last statement in the
1555 sequence doesn't have a corresponding pattern statement. In such
1556 case we associate the last pattern statement with the last statement
1557 in the sequence. Therefore, we only add the original statement to
1558 the list if we know that it is not the last. */
1559 if (prev_stmt)
1560 stmts->safe_push (prev_stmt);
1562 var = vect_recog_temp_ssa_var (new_type, NULL);
1563 pattern_stmt
1564 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1565 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1566 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1568 if (dump_enabled_p ())
1570 dump_printf_loc (MSG_NOTE, vect_location,
1571 "created pattern stmt: ");
1572 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1573 dump_printf (MSG_NOTE, "\n");
1576 type = gimple_expr_type (stmt);
1577 prev_stmt = stmt;
1578 stmt = use_stmt;
1580 first = false;
1583 /* We got a sequence. We expect it to end with a type demotion operation.
1584 Otherwise, we quit (for now). There are three possible cases: the
1585 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1586 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1587 NEW_TYPE differs (we create a new conversion statement). */
1588 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1590 use_lhs = gimple_assign_lhs (use_stmt);
1591 use_type = TREE_TYPE (use_lhs);
1592 /* Support only type demotion or signedess change. */
1593 if (!INTEGRAL_TYPE_P (use_type)
1594 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1595 return NULL;
1597 /* Check that NEW_TYPE is not bigger than the conversion result. */
1598 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1599 return NULL;
1601 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1602 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1604 /* Create NEW_TYPE->USE_TYPE conversion. */
1605 new_oprnd = make_ssa_name (use_type);
1606 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1607 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1609 *type_in = get_vectype_for_scalar_type (new_type);
1610 *type_out = get_vectype_for_scalar_type (use_type);
1612 /* We created a pattern statement for the last statement in the
1613 sequence, so we don't need to associate it with the pattern
1614 statement created for PREV_STMT. Therefore, we add PREV_STMT
1615 to the list in order to mark it later in vect_pattern_recog_1. */
1616 if (prev_stmt)
1617 stmts->safe_push (prev_stmt);
1619 else
1621 if (prev_stmt)
1622 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1623 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1625 *type_in = vectype;
1626 *type_out = NULL_TREE;
1629 stmts->safe_push (use_stmt);
1631 else
1632 /* TODO: support general case, create a conversion to the correct type. */
1633 return NULL;
1635 /* Pattern detected. */
1636 if (dump_enabled_p ())
1638 dump_printf_loc (MSG_NOTE, vect_location,
1639 "vect_recog_over_widening_pattern: detected: ");
1640 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1641 dump_printf (MSG_NOTE, "\n");
1644 return pattern_stmt;
1647 /* Detect widening shift pattern:
1649 type a_t;
1650 TYPE a_T, res_T;
1652 S1 a_t = ;
1653 S2 a_T = (TYPE) a_t;
1654 S3 res_T = a_T << CONST;
1656 where type 'TYPE' is at least double the size of type 'type'.
1658 Also detect cases where the shift result is immediately converted
1659 to another type 'result_type' that is no larger in size than 'TYPE'.
1660 In those cases we perform a widen-shift that directly results in
1661 'result_type', to avoid a possible over-widening situation:
1663 type a_t;
1664 TYPE a_T, res_T;
1665 result_type res_result;
1667 S1 a_t = ;
1668 S2 a_T = (TYPE) a_t;
1669 S3 res_T = a_T << CONST;
1670 S4 res_result = (result_type) res_T;
1671 '--> res_result' = a_t w<< CONST;
1673 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1674 create an additional pattern stmt for S2 to create a variable of an
1675 intermediate type, and perform widen-shift on the intermediate type:
1677 type a_t;
1678 interm_type a_it;
1679 TYPE a_T, res_T, res_T';
1681 S1 a_t = ;
1682 S2 a_T = (TYPE) a_t;
1683 '--> a_it = (interm_type) a_t;
1684 S3 res_T = a_T << CONST;
1685 '--> res_T' = a_it <<* CONST;
1687 Input/Output:
1689 * STMTS: Contains a stmt from which the pattern search begins.
1690 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1691 in STMTS. When an intermediate type is used and a pattern statement is
1692 created for S2, we also put S2 here (before S3).
1694 Output:
1696 * TYPE_IN: The type of the input arguments to the pattern.
1698 * TYPE_OUT: The type of the output of this pattern.
1700 * Return value: A new stmt that will be used to replace the sequence of
1701 stmts that constitute the pattern. In this case it will be:
1702 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1704 static gimple
1705 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1706 tree *type_in, tree *type_out)
1708 gimple last_stmt = stmts->pop ();
1709 gimple def_stmt0;
1710 tree oprnd0, oprnd1;
1711 tree type, half_type0;
1712 gimple pattern_stmt;
1713 tree vectype, vectype_out = NULL_TREE;
1714 tree var;
1715 enum tree_code dummy_code;
1716 int dummy_int;
1717 vec<tree> dummy_vec;
1718 gimple use_stmt;
1719 bool promotion;
1721 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1722 return NULL;
1724 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1725 return NULL;
1727 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1728 return NULL;
1730 oprnd0 = gimple_assign_rhs1 (last_stmt);
1731 oprnd1 = gimple_assign_rhs2 (last_stmt);
1732 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1733 return NULL;
1735 /* Check operand 0: it has to be defined by a type promotion. */
1736 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1737 &promotion)
1738 || !promotion)
1739 return NULL;
1741 /* Check operand 1: has to be positive. We check that it fits the type
1742 in vect_handle_widen_op_by_const (). */
1743 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1744 return NULL;
1746 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1747 type = gimple_expr_type (last_stmt);
1749 /* Check for subsequent conversion to another type. */
1750 use_stmt = vect_single_imm_use (last_stmt);
1751 if (use_stmt && is_gimple_assign (use_stmt)
1752 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1753 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1755 tree use_lhs = gimple_assign_lhs (use_stmt);
1756 tree use_type = TREE_TYPE (use_lhs);
1758 if (INTEGRAL_TYPE_P (use_type)
1759 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1761 last_stmt = use_stmt;
1762 type = use_type;
1766 /* Check if this a widening operation. */
1767 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1768 &oprnd0, stmts,
1769 type, &half_type0, def_stmt0))
1770 return NULL;
1772 /* Pattern detected. */
1773 if (dump_enabled_p ())
1774 dump_printf_loc (MSG_NOTE, vect_location,
1775 "vect_recog_widen_shift_pattern: detected:\n");
1777 /* Check target support. */
1778 vectype = get_vectype_for_scalar_type (half_type0);
1779 vectype_out = get_vectype_for_scalar_type (type);
1781 if (!vectype
1782 || !vectype_out
1783 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1784 vectype_out, vectype,
1785 &dummy_code, &dummy_code,
1786 &dummy_int, &dummy_vec))
1787 return NULL;
1789 *type_in = vectype;
1790 *type_out = vectype_out;
1792 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1793 var = vect_recog_temp_ssa_var (type, NULL);
1794 pattern_stmt =
1795 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1797 if (dump_enabled_p ())
1798 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1800 stmts->safe_push (last_stmt);
1801 return pattern_stmt;
1804 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1806 type a_t, b_t, c_t;
1808 S0 a_t = b_t r<< c_t;
1810 Input/Output:
1812 * STMTS: Contains a stmt from which the pattern search begins,
1813 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1814 with a sequence:
1816 S1 d_t = -c_t;
1817 S2 e_t = d_t & (B - 1);
1818 S3 f_t = b_t << c_t;
1819 S4 g_t = b_t >> e_t;
1820 S0 a_t = f_t | g_t;
1822 where B is element bitsize of type.
1824 Output:
1826 * TYPE_IN: The type of the input arguments to the pattern.
1828 * TYPE_OUT: The type of the output of this pattern.
1830 * Return value: A new stmt that will be used to replace the rotate
1831 S0 stmt. */
1833 static gimple
1834 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1836 gimple last_stmt = stmts->pop ();
1837 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1838 gimple pattern_stmt, def_stmt;
1839 enum tree_code rhs_code;
1840 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1841 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1842 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1843 enum vect_def_type dt;
1844 optab optab1, optab2;
1845 edge ext_def = NULL;
1847 if (!is_gimple_assign (last_stmt))
1848 return NULL;
1850 rhs_code = gimple_assign_rhs_code (last_stmt);
1851 switch (rhs_code)
1853 case LROTATE_EXPR:
1854 case RROTATE_EXPR:
1855 break;
1856 default:
1857 return NULL;
1860 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1861 return NULL;
1863 lhs = gimple_assign_lhs (last_stmt);
1864 oprnd0 = gimple_assign_rhs1 (last_stmt);
1865 type = TREE_TYPE (oprnd0);
1866 oprnd1 = gimple_assign_rhs2 (last_stmt);
1867 if (TREE_CODE (oprnd0) != SSA_NAME
1868 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1869 || !INTEGRAL_TYPE_P (type)
1870 || !TYPE_UNSIGNED (type))
1871 return NULL;
1873 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1874 &def, &dt))
1875 return NULL;
1877 if (dt != vect_internal_def
1878 && dt != vect_constant_def
1879 && dt != vect_external_def)
1880 return NULL;
1882 vectype = get_vectype_for_scalar_type (type);
1883 if (vectype == NULL_TREE)
1884 return NULL;
1886 /* If vector/vector or vector/scalar rotate is supported by the target,
1887 don't do anything here. */
1888 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1889 if (optab1
1890 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1891 return NULL;
1893 if (bb_vinfo != NULL || dt != vect_internal_def)
1895 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1896 if (optab2
1897 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1898 return NULL;
1901 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1902 don't do anything here either. */
1903 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1904 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1905 if (!optab1
1906 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1907 || !optab2
1908 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1910 if (bb_vinfo == NULL && dt == vect_internal_def)
1911 return NULL;
1912 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1913 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1914 if (!optab1
1915 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1916 || !optab2
1917 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1918 return NULL;
1921 *type_in = vectype;
1922 *type_out = vectype;
1923 if (*type_in == NULL_TREE)
1924 return NULL;
1926 if (dt == vect_external_def
1927 && TREE_CODE (oprnd1) == SSA_NAME
1928 && loop_vinfo)
1930 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1931 ext_def = loop_preheader_edge (loop);
1932 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1934 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1935 if (bb == NULL
1936 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1937 ext_def = NULL;
1941 def = NULL_TREE;
1942 if (TREE_CODE (oprnd1) == INTEGER_CST
1943 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1944 def = oprnd1;
1945 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1947 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1948 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1949 && TYPE_PRECISION (TREE_TYPE (rhs1))
1950 == TYPE_PRECISION (type))
1951 def = rhs1;
1954 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1955 if (def == NULL_TREE)
1957 def = vect_recog_temp_ssa_var (type, NULL);
1958 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1959 if (ext_def)
1961 basic_block new_bb
1962 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1963 gcc_assert (!new_bb);
1965 else
1966 append_pattern_def_seq (stmt_vinfo, def_stmt);
1968 stype = TREE_TYPE (def);
1970 if (TREE_CODE (def) == INTEGER_CST)
1972 if (!tree_fits_uhwi_p (def)
1973 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1974 || integer_zerop (def))
1975 return NULL;
1976 def2 = build_int_cst (stype,
1977 GET_MODE_PRECISION (TYPE_MODE (type))
1978 - tree_to_uhwi (def));
1980 else
1982 tree vecstype = get_vectype_for_scalar_type (stype);
1983 stmt_vec_info def_stmt_vinfo;
1985 if (vecstype == NULL_TREE)
1986 return NULL;
1987 def2 = vect_recog_temp_ssa_var (stype, NULL);
1988 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1989 if (ext_def)
1991 basic_block new_bb
1992 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1993 gcc_assert (!new_bb);
1995 else
1997 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1998 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1999 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2000 append_pattern_def_seq (stmt_vinfo, def_stmt);
2003 def2 = vect_recog_temp_ssa_var (stype, NULL);
2004 tree mask
2005 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
2006 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2007 gimple_assign_lhs (def_stmt), mask);
2008 if (ext_def)
2010 basic_block new_bb
2011 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2012 gcc_assert (!new_bb);
2014 else
2016 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2017 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2018 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2019 append_pattern_def_seq (stmt_vinfo, def_stmt);
2023 var1 = vect_recog_temp_ssa_var (type, NULL);
2024 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2025 ? LSHIFT_EXPR : RSHIFT_EXPR,
2026 oprnd0, def);
2027 append_pattern_def_seq (stmt_vinfo, def_stmt);
2029 var2 = vect_recog_temp_ssa_var (type, NULL);
2030 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2031 ? RSHIFT_EXPR : LSHIFT_EXPR,
2032 oprnd0, def2);
2033 append_pattern_def_seq (stmt_vinfo, def_stmt);
2035 /* Pattern detected. */
2036 if (dump_enabled_p ())
2037 dump_printf_loc (MSG_NOTE, vect_location,
2038 "vect_recog_rotate_pattern: detected:\n");
2040 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2041 var = vect_recog_temp_ssa_var (type, NULL);
2042 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2044 if (dump_enabled_p ())
2045 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2047 stmts->safe_push (last_stmt);
2048 return pattern_stmt;
2051 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2052 vectorized:
2054 type a_t;
2055 TYPE b_T, res_T;
2057 S1 a_t = ;
2058 S2 b_T = ;
2059 S3 res_T = b_T op a_t;
2061 where type 'TYPE' is a type with different size than 'type',
2062 and op is <<, >> or rotate.
2064 Also detect cases:
2066 type a_t;
2067 TYPE b_T, c_T, res_T;
2069 S0 c_T = ;
2070 S1 a_t = (type) c_T;
2071 S2 b_T = ;
2072 S3 res_T = b_T op a_t;
2074 Input/Output:
2076 * STMTS: Contains a stmt from which the pattern search begins,
2077 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2078 with a shift/rotate which has same type on both operands, in the
2079 second case just b_T op c_T, in the first case with added cast
2080 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2082 Output:
2084 * TYPE_IN: The type of the input arguments to the pattern.
2086 * TYPE_OUT: The type of the output of this pattern.
2088 * Return value: A new stmt that will be used to replace the shift/rotate
2089 S3 stmt. */
2091 static gimple
2092 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
2093 tree *type_in, tree *type_out)
2095 gimple last_stmt = stmts->pop ();
2096 tree oprnd0, oprnd1, lhs, var;
2097 gimple pattern_stmt, def_stmt;
2098 enum tree_code rhs_code;
2099 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2100 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2101 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2102 enum vect_def_type dt;
2103 tree def;
2105 if (!is_gimple_assign (last_stmt))
2106 return NULL;
2108 rhs_code = gimple_assign_rhs_code (last_stmt);
2109 switch (rhs_code)
2111 case LSHIFT_EXPR:
2112 case RSHIFT_EXPR:
2113 case LROTATE_EXPR:
2114 case RROTATE_EXPR:
2115 break;
2116 default:
2117 return NULL;
2120 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2121 return NULL;
2123 lhs = gimple_assign_lhs (last_stmt);
2124 oprnd0 = gimple_assign_rhs1 (last_stmt);
2125 oprnd1 = gimple_assign_rhs2 (last_stmt);
2126 if (TREE_CODE (oprnd0) != SSA_NAME
2127 || TREE_CODE (oprnd1) != SSA_NAME
2128 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2129 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2130 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2131 || TYPE_PRECISION (TREE_TYPE (lhs))
2132 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2133 return NULL;
2135 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
2136 &def, &dt))
2137 return NULL;
2139 if (dt != vect_internal_def)
2140 return NULL;
2142 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2143 *type_out = *type_in;
2144 if (*type_in == NULL_TREE)
2145 return NULL;
2147 def = NULL_TREE;
2148 if (gimple_assign_cast_p (def_stmt))
2150 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2151 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2152 && TYPE_PRECISION (TREE_TYPE (rhs1))
2153 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2154 def = rhs1;
2157 if (def == NULL_TREE)
2159 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2160 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2161 new_pattern_def_seq (stmt_vinfo, def_stmt);
2164 /* Pattern detected. */
2165 if (dump_enabled_p ())
2166 dump_printf_loc (MSG_NOTE, vect_location,
2167 "vect_recog_vector_vector_shift_pattern: detected:\n");
2169 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2170 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2171 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2173 if (dump_enabled_p ())
2174 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2176 stmts->safe_push (last_stmt);
2177 return pattern_stmt;
2180 /* Detect a signed division by a constant that wouldn't be
2181 otherwise vectorized:
2183 type a_t, b_t;
2185 S1 a_t = b_t / N;
2187 where type 'type' is an integral type and N is a constant.
2189 Similarly handle modulo by a constant:
2191 S4 a_t = b_t % N;
2193 Input/Output:
2195 * STMTS: Contains a stmt from which the pattern search begins,
2196 i.e. the division stmt. S1 is replaced by if N is a power
2197 of two constant and type is signed:
2198 S3 y_t = b_t < 0 ? N - 1 : 0;
2199 S2 x_t = b_t + y_t;
2200 S1' a_t = x_t >> log2 (N);
2202 S4 is replaced if N is a power of two constant and
2203 type is signed by (where *_T temporaries have unsigned type):
2204 S9 y_T = b_t < 0 ? -1U : 0U;
2205 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2206 S7 z_t = (type) z_T;
2207 S6 w_t = b_t + z_t;
2208 S5 x_t = w_t & (N - 1);
2209 S4' a_t = x_t - z_t;
2211 Output:
2213 * TYPE_IN: The type of the input arguments to the pattern.
2215 * TYPE_OUT: The type of the output of this pattern.
2217 * Return value: A new stmt that will be used to replace the division
2218 S1 or modulo S4 stmt. */
2220 static gimple
2221 vect_recog_divmod_pattern (vec<gimple> *stmts,
2222 tree *type_in, tree *type_out)
2224 gimple last_stmt = stmts->pop ();
2225 tree oprnd0, oprnd1, vectype, itype, cond;
2226 gimple pattern_stmt, def_stmt;
2227 enum tree_code rhs_code;
2228 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2229 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2230 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2231 optab optab;
2232 tree q;
2233 int dummy_int, prec;
2234 stmt_vec_info def_stmt_vinfo;
2236 if (!is_gimple_assign (last_stmt))
2237 return NULL;
2239 rhs_code = gimple_assign_rhs_code (last_stmt);
2240 switch (rhs_code)
2242 case TRUNC_DIV_EXPR:
2243 case TRUNC_MOD_EXPR:
2244 break;
2245 default:
2246 return NULL;
2249 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2250 return NULL;
2252 oprnd0 = gimple_assign_rhs1 (last_stmt);
2253 oprnd1 = gimple_assign_rhs2 (last_stmt);
2254 itype = TREE_TYPE (oprnd0);
2255 if (TREE_CODE (oprnd0) != SSA_NAME
2256 || TREE_CODE (oprnd1) != INTEGER_CST
2257 || TREE_CODE (itype) != INTEGER_TYPE
2258 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2259 return NULL;
2261 vectype = get_vectype_for_scalar_type (itype);
2262 if (vectype == NULL_TREE)
2263 return NULL;
2265 /* If the target can handle vectorized division or modulo natively,
2266 don't attempt to optimize this. */
2267 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2268 if (optab != unknown_optab)
2270 machine_mode vec_mode = TYPE_MODE (vectype);
2271 int icode = (int) optab_handler (optab, vec_mode);
2272 if (icode != CODE_FOR_nothing)
2273 return NULL;
2276 prec = TYPE_PRECISION (itype);
2277 if (integer_pow2p (oprnd1))
2279 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2280 return NULL;
2282 /* Pattern detected. */
2283 if (dump_enabled_p ())
2284 dump_printf_loc (MSG_NOTE, vect_location,
2285 "vect_recog_divmod_pattern: detected:\n");
2287 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2288 build_int_cst (itype, 0));
2289 if (rhs_code == TRUNC_DIV_EXPR)
2291 tree var = vect_recog_temp_ssa_var (itype, NULL);
2292 tree shift;
2293 def_stmt
2294 = gimple_build_assign (var, COND_EXPR, cond,
2295 fold_build2 (MINUS_EXPR, itype, oprnd1,
2296 build_int_cst (itype, 1)),
2297 build_int_cst (itype, 0));
2298 new_pattern_def_seq (stmt_vinfo, def_stmt);
2299 var = vect_recog_temp_ssa_var (itype, NULL);
2300 def_stmt
2301 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2302 gimple_assign_lhs (def_stmt));
2303 append_pattern_def_seq (stmt_vinfo, def_stmt);
2305 shift = build_int_cst (itype, tree_log2 (oprnd1));
2306 pattern_stmt
2307 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2308 RSHIFT_EXPR, var, shift);
2310 else
2312 tree signmask;
2313 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2314 if (compare_tree_int (oprnd1, 2) == 0)
2316 signmask = vect_recog_temp_ssa_var (itype, NULL);
2317 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2318 build_int_cst (itype, 1),
2319 build_int_cst (itype, 0));
2320 append_pattern_def_seq (stmt_vinfo, def_stmt);
2322 else
2324 tree utype
2325 = build_nonstandard_integer_type (prec, 1);
2326 tree vecutype = get_vectype_for_scalar_type (utype);
2327 tree shift
2328 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2329 - tree_log2 (oprnd1));
2330 tree var = vect_recog_temp_ssa_var (utype, NULL);
2332 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2333 build_int_cst (utype, -1),
2334 build_int_cst (utype, 0));
2335 def_stmt_vinfo
2336 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2337 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2338 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2339 append_pattern_def_seq (stmt_vinfo, def_stmt);
2340 var = vect_recog_temp_ssa_var (utype, NULL);
2341 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2342 gimple_assign_lhs (def_stmt),
2343 shift);
2344 def_stmt_vinfo
2345 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2346 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2347 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2348 append_pattern_def_seq (stmt_vinfo, def_stmt);
2349 signmask = vect_recog_temp_ssa_var (itype, NULL);
2350 def_stmt
2351 = gimple_build_assign (signmask, NOP_EXPR, var);
2352 append_pattern_def_seq (stmt_vinfo, def_stmt);
2354 def_stmt
2355 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2356 PLUS_EXPR, oprnd0, signmask);
2357 append_pattern_def_seq (stmt_vinfo, def_stmt);
2358 def_stmt
2359 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2360 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2361 fold_build2 (MINUS_EXPR, itype, oprnd1,
2362 build_int_cst (itype, 1)));
2363 append_pattern_def_seq (stmt_vinfo, def_stmt);
2365 pattern_stmt
2366 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2367 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2368 signmask);
2371 if (dump_enabled_p ())
2372 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2375 stmts->safe_push (last_stmt);
2377 *type_in = vectype;
2378 *type_out = vectype;
2379 return pattern_stmt;
2382 if (prec > HOST_BITS_PER_WIDE_INT
2383 || integer_zerop (oprnd1))
2384 return NULL;
2386 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2387 return NULL;
2389 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2391 if (TYPE_UNSIGNED (itype))
2393 unsigned HOST_WIDE_INT mh, ml;
2394 int pre_shift, post_shift;
2395 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2396 & GET_MODE_MASK (TYPE_MODE (itype)));
2397 tree t1, t2, t3, t4;
2399 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2400 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2401 return NULL;
2403 /* Find a suitable multiplier and right shift count
2404 instead of multiplying with D. */
2405 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2407 /* If the suggested multiplier is more than SIZE bits, we can do better
2408 for even divisors, using an initial right shift. */
2409 if (mh != 0 && (d & 1) == 0)
2411 pre_shift = floor_log2 (d & -d);
2412 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2413 &ml, &post_shift, &dummy_int);
2414 gcc_assert (!mh);
2416 else
2417 pre_shift = 0;
2419 if (mh != 0)
2421 if (post_shift - 1 >= prec)
2422 return NULL;
2424 /* t1 = oprnd0 h* ml;
2425 t2 = oprnd0 - t1;
2426 t3 = t2 >> 1;
2427 t4 = t1 + t3;
2428 q = t4 >> (post_shift - 1); */
2429 t1 = vect_recog_temp_ssa_var (itype, NULL);
2430 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2431 build_int_cst (itype, ml));
2432 append_pattern_def_seq (stmt_vinfo, def_stmt);
2434 t2 = vect_recog_temp_ssa_var (itype, NULL);
2435 def_stmt
2436 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2437 append_pattern_def_seq (stmt_vinfo, def_stmt);
2439 t3 = vect_recog_temp_ssa_var (itype, NULL);
2440 def_stmt
2441 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2442 append_pattern_def_seq (stmt_vinfo, def_stmt);
2444 t4 = vect_recog_temp_ssa_var (itype, NULL);
2445 def_stmt
2446 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2448 if (post_shift != 1)
2450 append_pattern_def_seq (stmt_vinfo, def_stmt);
2452 q = vect_recog_temp_ssa_var (itype, NULL);
2453 pattern_stmt
2454 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2455 build_int_cst (itype, post_shift - 1));
2457 else
2459 q = t4;
2460 pattern_stmt = def_stmt;
2463 else
2465 if (pre_shift >= prec || post_shift >= prec)
2466 return NULL;
2468 /* t1 = oprnd0 >> pre_shift;
2469 t2 = t1 h* ml;
2470 q = t2 >> post_shift; */
2471 if (pre_shift)
2473 t1 = vect_recog_temp_ssa_var (itype, NULL);
2474 def_stmt
2475 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2476 build_int_cst (NULL, pre_shift));
2477 append_pattern_def_seq (stmt_vinfo, def_stmt);
2479 else
2480 t1 = oprnd0;
2482 t2 = vect_recog_temp_ssa_var (itype, NULL);
2483 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2484 build_int_cst (itype, ml));
2486 if (post_shift)
2488 append_pattern_def_seq (stmt_vinfo, def_stmt);
2490 q = vect_recog_temp_ssa_var (itype, NULL);
2491 def_stmt
2492 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2493 build_int_cst (itype, post_shift));
2495 else
2496 q = t2;
2498 pattern_stmt = def_stmt;
2501 else
2503 unsigned HOST_WIDE_INT ml;
2504 int post_shift;
2505 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2506 unsigned HOST_WIDE_INT abs_d;
2507 bool add = false;
2508 tree t1, t2, t3, t4;
2510 /* Give up for -1. */
2511 if (d == -1)
2512 return NULL;
2514 /* Since d might be INT_MIN, we have to cast to
2515 unsigned HOST_WIDE_INT before negating to avoid
2516 undefined signed overflow. */
2517 abs_d = (d >= 0
2518 ? (unsigned HOST_WIDE_INT) d
2519 : - (unsigned HOST_WIDE_INT) d);
2521 /* n rem d = n rem -d */
2522 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2524 d = abs_d;
2525 oprnd1 = build_int_cst (itype, abs_d);
2527 else if (HOST_BITS_PER_WIDE_INT >= prec
2528 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2529 /* This case is not handled correctly below. */
2530 return NULL;
2532 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2533 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2535 add = true;
2536 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2538 if (post_shift >= prec)
2539 return NULL;
2541 /* t1 = oprnd0 h* ml; */
2542 t1 = vect_recog_temp_ssa_var (itype, NULL);
2543 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2544 build_int_cst (itype, ml));
2546 if (add)
2548 /* t2 = t1 + oprnd0; */
2549 append_pattern_def_seq (stmt_vinfo, def_stmt);
2550 t2 = vect_recog_temp_ssa_var (itype, NULL);
2551 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2553 else
2554 t2 = t1;
2556 if (post_shift)
2558 /* t3 = t2 >> post_shift; */
2559 append_pattern_def_seq (stmt_vinfo, def_stmt);
2560 t3 = vect_recog_temp_ssa_var (itype, NULL);
2561 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2562 build_int_cst (itype, post_shift));
2564 else
2565 t3 = t2;
2567 wide_int oprnd0_min, oprnd0_max;
2568 int msb = 1;
2569 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2571 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2572 msb = 0;
2573 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2574 msb = -1;
2577 if (msb == 0 && d >= 0)
2579 /* q = t3; */
2580 q = t3;
2581 pattern_stmt = def_stmt;
2583 else
2585 /* t4 = oprnd0 >> (prec - 1);
2586 or if we know from VRP that oprnd0 >= 0
2587 t4 = 0;
2588 or if we know from VRP that oprnd0 < 0
2589 t4 = -1; */
2590 append_pattern_def_seq (stmt_vinfo, def_stmt);
2591 t4 = vect_recog_temp_ssa_var (itype, NULL);
2592 if (msb != 1)
2593 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2594 build_int_cst (itype, msb));
2595 else
2596 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2597 build_int_cst (itype, prec - 1));
2598 append_pattern_def_seq (stmt_vinfo, def_stmt);
2600 /* q = t3 - t4; or q = t4 - t3; */
2601 q = vect_recog_temp_ssa_var (itype, NULL);
2602 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2603 d < 0 ? t3 : t4);
2607 if (rhs_code == TRUNC_MOD_EXPR)
2609 tree r, t1;
2611 /* We divided. Now finish by:
2612 t1 = q * oprnd1;
2613 r = oprnd0 - t1; */
2614 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2616 t1 = vect_recog_temp_ssa_var (itype, NULL);
2617 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2618 append_pattern_def_seq (stmt_vinfo, def_stmt);
2620 r = vect_recog_temp_ssa_var (itype, NULL);
2621 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2624 /* Pattern detected. */
2625 if (dump_enabled_p ())
2627 dump_printf_loc (MSG_NOTE, vect_location,
2628 "vect_recog_divmod_pattern: detected: ");
2629 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2630 dump_printf (MSG_NOTE, "\n");
2633 stmts->safe_push (last_stmt);
2635 *type_in = vectype;
2636 *type_out = vectype;
2637 return pattern_stmt;
2640 /* Function vect_recog_mixed_size_cond_pattern
2642 Try to find the following pattern:
2644 type x_t, y_t;
2645 TYPE a_T, b_T, c_T;
2646 loop:
2647 S1 a_T = x_t CMP y_t ? b_T : c_T;
2649 where type 'TYPE' is an integral type which has different size
2650 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2651 than 'type', the constants need to fit into an integer type
2652 with the same width as 'type') or results of conversion from 'type'.
2654 Input:
2656 * LAST_STMT: A stmt from which the pattern search begins.
2658 Output:
2660 * TYPE_IN: The type of the input arguments to the pattern.
2662 * TYPE_OUT: The type of the output of this pattern.
2664 * Return value: A new stmt that will be used to replace the pattern.
2665 Additionally a def_stmt is added.
2667 a_it = x_t CMP y_t ? b_it : c_it;
2668 a_T = (TYPE) a_it; */
2670 static gimple
2671 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2672 tree *type_out)
2674 gimple last_stmt = (*stmts)[0];
2675 tree cond_expr, then_clause, else_clause;
2676 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2677 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2678 machine_mode cmpmode;
2679 gimple pattern_stmt, def_stmt;
2680 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2681 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2682 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2683 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2684 bool promotion;
2685 tree comp_scalar_type;
2687 if (!is_gimple_assign (last_stmt)
2688 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2689 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2690 return NULL;
2692 cond_expr = gimple_assign_rhs1 (last_stmt);
2693 then_clause = gimple_assign_rhs2 (last_stmt);
2694 else_clause = gimple_assign_rhs3 (last_stmt);
2696 if (!COMPARISON_CLASS_P (cond_expr))
2697 return NULL;
2699 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2700 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2701 if (comp_vectype == NULL_TREE)
2702 return NULL;
2704 type = gimple_expr_type (last_stmt);
2705 if (types_compatible_p (type, comp_scalar_type)
2706 || ((TREE_CODE (then_clause) != INTEGER_CST
2707 || TREE_CODE (else_clause) != INTEGER_CST)
2708 && !INTEGRAL_TYPE_P (comp_scalar_type))
2709 || !INTEGRAL_TYPE_P (type))
2710 return NULL;
2712 if ((TREE_CODE (then_clause) != INTEGER_CST
2713 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2714 &def_stmt0, &promotion))
2715 || (TREE_CODE (else_clause) != INTEGER_CST
2716 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2717 &def_stmt1, &promotion)))
2718 return NULL;
2720 if (orig_type0 && orig_type1
2721 && !types_compatible_p (orig_type0, orig_type1))
2722 return NULL;
2724 if (orig_type0)
2726 if (!types_compatible_p (orig_type0, comp_scalar_type))
2727 return NULL;
2728 then_clause = gimple_assign_rhs1 (def_stmt0);
2729 itype = orig_type0;
2732 if (orig_type1)
2734 if (!types_compatible_p (orig_type1, comp_scalar_type))
2735 return NULL;
2736 else_clause = gimple_assign_rhs1 (def_stmt1);
2737 itype = orig_type1;
2740 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2742 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2743 return NULL;
2745 vectype = get_vectype_for_scalar_type (type);
2746 if (vectype == NULL_TREE)
2747 return NULL;
2749 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2750 return NULL;
2752 if (itype == NULL_TREE)
2753 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2754 TYPE_UNSIGNED (type));
2756 if (itype == NULL_TREE
2757 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2758 return NULL;
2760 vecitype = get_vectype_for_scalar_type (itype);
2761 if (vecitype == NULL_TREE)
2762 return NULL;
2764 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2765 return NULL;
2767 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2769 if ((TREE_CODE (then_clause) == INTEGER_CST
2770 && !int_fits_type_p (then_clause, itype))
2771 || (TREE_CODE (else_clause) == INTEGER_CST
2772 && !int_fits_type_p (else_clause, itype)))
2773 return NULL;
2776 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2777 COND_EXPR, unshare_expr (cond_expr),
2778 fold_convert (itype, then_clause),
2779 fold_convert (itype, else_clause));
2780 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2781 NOP_EXPR, gimple_assign_lhs (def_stmt));
2783 new_pattern_def_seq (stmt_vinfo, def_stmt);
2784 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2785 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2786 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2787 *type_in = vecitype;
2788 *type_out = vectype;
2790 if (dump_enabled_p ())
2791 dump_printf_loc (MSG_NOTE, vect_location,
2792 "vect_recog_mixed_size_cond_pattern: detected:\n");
2794 return pattern_stmt;
2798 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2799 true if bool VAR can be optimized that way. */
2801 static bool
2802 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2804 gimple def_stmt;
2805 enum vect_def_type dt;
2806 tree def, rhs1;
2807 enum tree_code rhs_code;
2809 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2810 &dt))
2811 return false;
2813 if (dt != vect_internal_def)
2814 return false;
2816 if (!is_gimple_assign (def_stmt))
2817 return false;
2819 if (!has_single_use (def))
2820 return false;
2822 rhs1 = gimple_assign_rhs1 (def_stmt);
2823 rhs_code = gimple_assign_rhs_code (def_stmt);
2824 switch (rhs_code)
2826 case SSA_NAME:
2827 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2829 CASE_CONVERT:
2830 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2831 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2832 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2833 return false;
2834 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2836 case BIT_NOT_EXPR:
2837 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2839 case BIT_AND_EXPR:
2840 case BIT_IOR_EXPR:
2841 case BIT_XOR_EXPR:
2842 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2843 return false;
2844 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2845 bb_vinfo);
2847 default:
2848 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2850 tree vecitype, comp_vectype;
2852 /* If the comparison can throw, then is_gimple_condexpr will be
2853 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2854 if (stmt_could_throw_p (def_stmt))
2855 return false;
2857 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2858 if (comp_vectype == NULL_TREE)
2859 return false;
2861 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2863 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2864 tree itype
2865 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2866 vecitype = get_vectype_for_scalar_type (itype);
2867 if (vecitype == NULL_TREE)
2868 return false;
2870 else
2871 vecitype = comp_vectype;
2872 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2874 return false;
2879 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2880 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2881 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2883 static tree
2884 adjust_bool_pattern_cast (tree type, tree var)
2886 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2887 gimple cast_stmt, pattern_stmt;
2889 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2890 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2891 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2892 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2893 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2894 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2895 return gimple_assign_lhs (cast_stmt);
2899 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2900 recursively. VAR is an SSA_NAME that should be transformed from bool
2901 to a wider integer type, OUT_TYPE is the desired final integer type of
2902 the whole pattern, TRUEVAL should be NULL unless optimizing
2903 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2904 in the then_clause, STMTS is where statements with added pattern stmts
2905 should be pushed to. */
2907 static tree
2908 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2909 vec<gimple> *stmts)
2911 gimple stmt = SSA_NAME_DEF_STMT (var);
2912 enum tree_code rhs_code, def_rhs_code;
2913 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2914 location_t loc;
2915 gimple pattern_stmt, def_stmt;
2917 rhs1 = gimple_assign_rhs1 (stmt);
2918 rhs2 = gimple_assign_rhs2 (stmt);
2919 rhs_code = gimple_assign_rhs_code (stmt);
2920 loc = gimple_location (stmt);
2921 switch (rhs_code)
2923 case SSA_NAME:
2924 CASE_CONVERT:
2925 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2926 itype = TREE_TYPE (irhs1);
2927 pattern_stmt
2928 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2929 SSA_NAME, irhs1);
2930 break;
2932 case BIT_NOT_EXPR:
2933 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2934 itype = TREE_TYPE (irhs1);
2935 pattern_stmt
2936 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2937 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
2938 break;
2940 case BIT_AND_EXPR:
2941 /* Try to optimize x = y & (a < b ? 1 : 0); into
2942 x = (a < b ? y : 0);
2944 E.g. for:
2945 bool a_b, b_b, c_b;
2946 TYPE d_T;
2948 S1 a_b = x1 CMP1 y1;
2949 S2 b_b = x2 CMP2 y2;
2950 S3 c_b = a_b & b_b;
2951 S4 d_T = (TYPE) c_b;
2953 we would normally emit:
2955 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2956 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2957 S3' c_T = a_T & b_T;
2958 S4' d_T = c_T;
2960 but we can save one stmt by using the
2961 result of one of the COND_EXPRs in the other COND_EXPR and leave
2962 BIT_AND_EXPR stmt out:
2964 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2965 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2966 S4' f_T = c_T;
2968 At least when VEC_COND_EXPR is implemented using masks
2969 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2970 computes the comparison masks and ands it, in one case with
2971 all ones vector, in the other case with a vector register.
2972 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2973 often more expensive. */
2974 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2975 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2976 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2978 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2979 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2980 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2981 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2983 gimple tstmt;
2984 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2985 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2986 tstmt = stmts->pop ();
2987 gcc_assert (tstmt == def_stmt);
2988 stmts->quick_push (stmt);
2989 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2990 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2991 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2992 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2993 return irhs2;
2995 else
2996 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2997 goto and_ior_xor;
2999 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3000 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3001 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3003 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3004 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3005 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3006 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3008 gimple tstmt;
3009 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3010 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3011 tstmt = stmts->pop ();
3012 gcc_assert (tstmt == def_stmt);
3013 stmts->quick_push (stmt);
3014 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3015 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3016 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3017 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3018 return irhs1;
3020 else
3021 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3022 goto and_ior_xor;
3024 /* FALLTHRU */
3025 case BIT_IOR_EXPR:
3026 case BIT_XOR_EXPR:
3027 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3028 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3029 and_ior_xor:
3030 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3031 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3033 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3034 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3035 int out_prec = TYPE_PRECISION (out_type);
3036 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3037 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3038 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3039 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3040 else
3042 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3043 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3046 itype = TREE_TYPE (irhs1);
3047 pattern_stmt
3048 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3049 rhs_code, irhs1, irhs2);
3050 break;
3052 default:
3053 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3054 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3055 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3056 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3057 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3059 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3060 itype
3061 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3063 else
3064 itype = TREE_TYPE (rhs1);
3065 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3066 if (trueval == NULL_TREE)
3067 trueval = build_int_cst (itype, 1);
3068 else
3069 gcc_checking_assert (useless_type_conversion_p (itype,
3070 TREE_TYPE (trueval)));
3071 pattern_stmt
3072 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3073 COND_EXPR, cond_expr, trueval,
3074 build_int_cst (itype, 0));
3075 break;
3078 stmts->safe_push (stmt);
3079 gimple_set_location (pattern_stmt, loc);
3080 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3081 return gimple_assign_lhs (pattern_stmt);
3085 /* Function vect_recog_bool_pattern
3087 Try to find pattern like following:
3089 bool a_b, b_b, c_b, d_b, e_b;
3090 TYPE f_T;
3091 loop:
3092 S1 a_b = x1 CMP1 y1;
3093 S2 b_b = x2 CMP2 y2;
3094 S3 c_b = a_b & b_b;
3095 S4 d_b = x3 CMP3 y3;
3096 S5 e_b = c_b | d_b;
3097 S6 f_T = (TYPE) e_b;
3099 where type 'TYPE' is an integral type. Or a similar pattern
3100 ending in
3102 S6 f_Y = e_b ? r_Y : s_Y;
3104 as results from if-conversion of a complex condition.
3106 Input:
3108 * LAST_STMT: A stmt at the end from which the pattern
3109 search begins, i.e. cast of a bool to
3110 an integer type.
3112 Output:
3114 * TYPE_IN: The type of the input arguments to the pattern.
3116 * TYPE_OUT: The type of the output of this pattern.
3118 * Return value: A new stmt that will be used to replace the pattern.
3120 Assuming size of TYPE is the same as size of all comparisons
3121 (otherwise some casts would be added where needed), the above
3122 sequence we create related pattern stmts:
3123 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3124 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3125 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3126 S5' e_T = c_T | d_T;
3127 S6' f_T = e_T;
3129 Instead of the above S3' we could emit:
3130 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3131 S3' c_T = a_T | b_T;
3132 but the above is more efficient. */
3134 static gimple
3135 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
3136 tree *type_out)
3138 gimple last_stmt = stmts->pop ();
3139 enum tree_code rhs_code;
3140 tree var, lhs, rhs, vectype;
3141 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3143 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
3144 gimple pattern_stmt;
3146 if (!is_gimple_assign (last_stmt))
3147 return NULL;
3149 var = gimple_assign_rhs1 (last_stmt);
3150 lhs = gimple_assign_lhs (last_stmt);
3152 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3153 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3154 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3155 return NULL;
3157 rhs_code = gimple_assign_rhs_code (last_stmt);
3158 if (CONVERT_EXPR_CODE_P (rhs_code))
3160 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3161 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3162 return NULL;
3163 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3164 if (vectype == NULL_TREE)
3165 return NULL;
3167 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3168 return NULL;
3170 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3171 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3172 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3173 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3174 else
3175 pattern_stmt
3176 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3177 *type_out = vectype;
3178 *type_in = vectype;
3179 stmts->safe_push (last_stmt);
3180 if (dump_enabled_p ())
3181 dump_printf_loc (MSG_NOTE, vect_location,
3182 "vect_recog_bool_pattern: detected:\n");
3184 return pattern_stmt;
3186 else if (rhs_code == COND_EXPR
3187 && TREE_CODE (var) == SSA_NAME)
3189 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3190 if (vectype == NULL_TREE)
3191 return NULL;
3193 /* Build a scalar type for the boolean result that when
3194 vectorized matches the vector type of the result in
3195 size and number of elements. */
3196 unsigned prec
3197 = wi::udiv_trunc (TYPE_SIZE (vectype),
3198 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3199 tree type
3200 = build_nonstandard_integer_type (prec,
3201 TYPE_UNSIGNED (TREE_TYPE (var)));
3202 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3203 return NULL;
3205 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3206 return NULL;
3208 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3209 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3210 pattern_stmt
3211 = gimple_build_assign (lhs, COND_EXPR,
3212 build2 (NE_EXPR, boolean_type_node,
3213 rhs, build_int_cst (type, 0)),
3214 gimple_assign_rhs2 (last_stmt),
3215 gimple_assign_rhs3 (last_stmt));
3216 *type_out = vectype;
3217 *type_in = vectype;
3218 stmts->safe_push (last_stmt);
3219 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_NOTE, vect_location,
3221 "vect_recog_bool_pattern: detected:\n");
3223 return pattern_stmt;
3225 else if (rhs_code == SSA_NAME
3226 && STMT_VINFO_DATA_REF (stmt_vinfo))
3228 stmt_vec_info pattern_stmt_info;
3229 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3230 gcc_assert (vectype != NULL_TREE);
3231 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3232 return NULL;
3233 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3234 return NULL;
3236 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3237 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3238 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3240 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3241 gimple cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3242 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3243 rhs = rhs2;
3245 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3246 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3247 bb_vinfo);
3248 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3249 STMT_VINFO_DATA_REF (pattern_stmt_info)
3250 = STMT_VINFO_DATA_REF (stmt_vinfo);
3251 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3252 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3253 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3254 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3255 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3256 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3257 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3258 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3259 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3260 *type_out = vectype;
3261 *type_in = vectype;
3262 stmts->safe_push (last_stmt);
3263 if (dump_enabled_p ())
3264 dump_printf_loc (MSG_NOTE, vect_location,
3265 "vect_recog_bool_pattern: detected:\n");
3266 return pattern_stmt;
3268 else
3269 return NULL;
3273 /* Mark statements that are involved in a pattern. */
3275 static inline void
3276 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3277 tree pattern_vectype)
3279 stmt_vec_info pattern_stmt_info, def_stmt_info;
3280 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3281 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3282 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3283 gimple def_stmt;
3285 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3286 if (pattern_stmt_info == NULL)
3288 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3289 bb_vinfo);
3290 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3292 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3294 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3295 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3296 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3297 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3298 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3299 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3300 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3301 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3302 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3304 gimple_stmt_iterator si;
3305 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3306 !gsi_end_p (si); gsi_next (&si))
3308 def_stmt = gsi_stmt (si);
3309 def_stmt_info = vinfo_for_stmt (def_stmt);
3310 if (def_stmt_info == NULL)
3312 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3313 bb_vinfo);
3314 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3316 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3317 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3318 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3319 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3320 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3325 /* Function vect_pattern_recog_1
3327 Input:
3328 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3329 computation pattern.
3330 STMT: A stmt from which the pattern search should start.
3332 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3333 expression that computes the same functionality and can be used to
3334 replace the sequence of stmts that are involved in the pattern.
3336 Output:
3337 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3338 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3339 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3340 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3341 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3342 to the available target pattern.
3344 This function also does some bookkeeping, as explained in the documentation
3345 for vect_recog_pattern. */
3347 static void
3348 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3349 gimple_stmt_iterator si,
3350 vec<gimple> *stmts_to_replace)
3352 gimple stmt = gsi_stmt (si), pattern_stmt;
3353 stmt_vec_info stmt_info;
3354 loop_vec_info loop_vinfo;
3355 tree pattern_vectype;
3356 tree type_in, type_out;
3357 enum tree_code code;
3358 int i;
3359 gimple next;
3361 stmts_to_replace->truncate (0);
3362 stmts_to_replace->quick_push (stmt);
3363 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3364 if (!pattern_stmt)
3365 return;
3367 stmt = stmts_to_replace->last ();
3368 stmt_info = vinfo_for_stmt (stmt);
3369 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3371 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3373 /* No need to check target support (already checked by the pattern
3374 recognition function). */
3375 pattern_vectype = type_out ? type_out : type_in;
3377 else
3379 machine_mode vec_mode;
3380 enum insn_code icode;
3381 optab optab;
3383 /* Check target support */
3384 type_in = get_vectype_for_scalar_type (type_in);
3385 if (!type_in)
3386 return;
3387 if (type_out)
3388 type_out = get_vectype_for_scalar_type (type_out);
3389 else
3390 type_out = type_in;
3391 if (!type_out)
3392 return;
3393 pattern_vectype = type_out;
3395 if (is_gimple_assign (pattern_stmt))
3396 code = gimple_assign_rhs_code (pattern_stmt);
3397 else
3399 gcc_assert (is_gimple_call (pattern_stmt));
3400 code = CALL_EXPR;
3403 optab = optab_for_tree_code (code, type_in, optab_default);
3404 vec_mode = TYPE_MODE (type_in);
3405 if (!optab
3406 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3407 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3408 return;
3411 /* Found a vectorizable pattern. */
3412 if (dump_enabled_p ())
3414 dump_printf_loc (MSG_NOTE, vect_location,
3415 "pattern recognized: ");
3416 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3417 dump_printf (MSG_NOTE, "\n");
3420 /* Mark the stmts that are involved in the pattern. */
3421 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3423 /* Patterns cannot be vectorized using SLP, because they change the order of
3424 computation. */
3425 if (loop_vinfo)
3426 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3427 if (next == stmt)
3428 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3430 /* It is possible that additional pattern stmts are created and inserted in
3431 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3432 relevant statements. */
3433 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3434 && (unsigned) i < (stmts_to_replace->length () - 1);
3435 i++)
3437 stmt_info = vinfo_for_stmt (stmt);
3438 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3439 if (dump_enabled_p ())
3441 dump_printf_loc (MSG_NOTE, vect_location,
3442 "additional pattern stmt: ");
3443 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3444 dump_printf (MSG_NOTE, "\n");
3447 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3452 /* Function vect_pattern_recog
3454 Input:
3455 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3456 computation idioms.
3458 Output - for each computation idiom that is detected we create a new stmt
3459 that provides the same functionality and that can be vectorized. We
3460 also record some information in the struct_stmt_info of the relevant
3461 stmts, as explained below:
3463 At the entry to this function we have the following stmts, with the
3464 following initial value in the STMT_VINFO fields:
3466 stmt in_pattern_p related_stmt vec_stmt
3467 S1: a_i = .... - - -
3468 S2: a_2 = ..use(a_i).. - - -
3469 S3: a_1 = ..use(a_2).. - - -
3470 S4: a_0 = ..use(a_1).. - - -
3471 S5: ... = ..use(a_0).. - - -
3473 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3474 represented by a single stmt. We then:
3475 - create a new stmt S6 equivalent to the pattern (the stmt is not
3476 inserted into the code)
3477 - fill in the STMT_VINFO fields as follows:
3479 in_pattern_p related_stmt vec_stmt
3480 S1: a_i = .... - - -
3481 S2: a_2 = ..use(a_i).. - - -
3482 S3: a_1 = ..use(a_2).. - - -
3483 S4: a_0 = ..use(a_1).. true S6 -
3484 '---> S6: a_new = .... - S4 -
3485 S5: ... = ..use(a_0).. - - -
3487 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3488 to each other through the RELATED_STMT field).
3490 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3491 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3492 remain irrelevant unless used by stmts other than S4.
3494 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3495 (because they are marked as irrelevant). It will vectorize S6, and record
3496 a pointer to the new vector stmt VS6 from S6 (as usual).
3497 S4 will be skipped, and S5 will be vectorized as usual:
3499 in_pattern_p related_stmt vec_stmt
3500 S1: a_i = .... - - -
3501 S2: a_2 = ..use(a_i).. - - -
3502 S3: a_1 = ..use(a_2).. - - -
3503 > VS6: va_new = .... - - -
3504 S4: a_0 = ..use(a_1).. true S6 VS6
3505 '---> S6: a_new = .... - S4 VS6
3506 > VS5: ... = ..vuse(va_new).. - - -
3507 S5: ... = ..use(a_0).. - - -
3509 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3510 elsewhere), and we'll end up with:
3512 VS6: va_new = ....
3513 VS5: ... = ..vuse(va_new)..
3515 In case of more than one pattern statements, e.g., widen-mult with
3516 intermediate type:
3518 S1 a_t = ;
3519 S2 a_T = (TYPE) a_t;
3520 '--> S3: a_it = (interm_type) a_t;
3521 S4 prod_T = a_T * CONST;
3522 '--> S5: prod_T' = a_it w* CONST;
3524 there may be other users of a_T outside the pattern. In that case S2 will
3525 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3526 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3527 be recorded in S3. */
3529 void
3530 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3532 struct loop *loop;
3533 basic_block *bbs;
3534 unsigned int nbbs;
3535 gimple_stmt_iterator si;
3536 unsigned int i, j;
3537 vect_recog_func_ptr vect_recog_func;
3538 auto_vec<gimple, 1> stmts_to_replace;
3539 gimple stmt;
3541 if (dump_enabled_p ())
3542 dump_printf_loc (MSG_NOTE, vect_location,
3543 "=== vect_pattern_recog ===\n");
3545 if (loop_vinfo)
3547 loop = LOOP_VINFO_LOOP (loop_vinfo);
3548 bbs = LOOP_VINFO_BBS (loop_vinfo);
3549 nbbs = loop->num_nodes;
3551 else
3553 bbs = &BB_VINFO_BB (bb_vinfo);
3554 nbbs = 1;
3557 /* Scan through the loop stmts, applying the pattern recognition
3558 functions starting at each stmt visited: */
3559 for (i = 0; i < nbbs; i++)
3561 basic_block bb = bbs[i];
3562 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3564 if (bb_vinfo && (stmt = gsi_stmt (si))
3565 && vinfo_for_stmt (stmt)
3566 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3567 continue;
3569 /* Scan over all generic vect_recog_xxx_pattern functions. */
3570 for (j = 0; j < NUM_PATTERNS; j++)
3572 vect_recog_func = vect_vect_recog_func_ptrs[j];
3573 vect_pattern_recog_1 (vect_recog_func, si,
3574 &stmts_to_replace);