PR bootstrap/65150
[official-gcc.git] / gcc / tree-vect-patterns.c
blob86d2447361b4d9c0e160bfa1afa26332b5bc2e59
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "target.h"
38 #include "predict.h"
39 #include "hard-reg-set.h"
40 #include "function.h"
41 #include "dominance.h"
42 #include "basic-block.h"
43 #include "gimple-pretty-print.h"
44 #include "tree-ssa-alias.h"
45 #include "internal-fn.h"
46 #include "tree-eh.h"
47 #include "gimple-expr.h"
48 #include "is-a.h"
49 #include "gimple.h"
50 #include "gimplify.h"
51 #include "gimple-iterator.h"
52 #include "gimple-ssa.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "cfgloop.h"
58 #include "hashtab.h"
59 #include "rtl.h"
60 #include "flags.h"
61 #include "statistics.h"
62 #include "real.h"
63 #include "fixed-value.h"
64 #include "insn-config.h"
65 #include "expmed.h"
66 #include "dojump.h"
67 #include "explow.h"
68 #include "calls.h"
69 #include "emit-rtl.h"
70 #include "varasm.h"
71 #include "stmt.h"
72 #include "expr.h"
73 #include "insn-codes.h"
74 #include "optabs.h"
75 #include "params.h"
76 #include "tree-data-ref.h"
77 #include "tree-vectorizer.h"
78 #include "recog.h" /* FIXME: for insn_data */
79 #include "diagnostic-core.h"
80 #include "dumpfile.h"
81 #include "builtins.h"
83 /* Pattern recognition functions */
84 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
85 tree *);
86 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
87 tree *);
88 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
89 tree *);
90 static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
91 tree *);
92 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
93 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
94 tree *);
95 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
96 tree *, tree *);
97 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
98 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
99 tree *, tree *);
100 static gimple vect_recog_divmod_pattern (vec<gimple> *,
101 tree *, tree *);
102 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
103 tree *, tree *);
104 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
105 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
106 vect_recog_widen_mult_pattern,
107 vect_recog_widen_sum_pattern,
108 vect_recog_dot_prod_pattern,
109 vect_recog_sad_pattern,
110 vect_recog_pow_pattern,
111 vect_recog_widen_shift_pattern,
112 vect_recog_over_widening_pattern,
113 vect_recog_rotate_pattern,
114 vect_recog_vector_vector_shift_pattern,
115 vect_recog_divmod_pattern,
116 vect_recog_mixed_size_cond_pattern,
117 vect_recog_bool_pattern};
119 static inline void
120 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
122 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
123 stmt);
126 static inline void
127 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
129 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
130 append_pattern_def_seq (stmt_info, stmt);
133 /* Check whether STMT2 is in the same loop or basic block as STMT1.
134 Which of the two applies depends on whether we're currently doing
135 loop-based or basic-block-based vectorization, as determined by
136 the vinfo_for_stmt for STMT1 (which must be defined).
138 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
139 to be defined as well. */
141 static bool
142 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
144 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
145 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
146 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
148 if (!gimple_bb (stmt2))
149 return false;
151 if (loop_vinfo)
153 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
154 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
155 return false;
157 else
159 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
160 || gimple_code (stmt2) == GIMPLE_PHI)
161 return false;
164 gcc_assert (vinfo_for_stmt (stmt2));
165 return true;
168 /* If the LHS of DEF_STMT has a single use, and that statement is
169 in the same loop or basic block, return it. */
171 static gimple
172 vect_single_imm_use (gimple def_stmt)
174 tree lhs = gimple_assign_lhs (def_stmt);
175 use_operand_p use_p;
176 gimple use_stmt;
178 if (!single_imm_use (lhs, &use_p, &use_stmt))
179 return NULL;
181 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
182 return NULL;
184 return use_stmt;
187 /* Check whether NAME, an ssa-name used in USE_STMT,
188 is a result of a type promotion, such that:
189 DEF_STMT: NAME = NOP (name0)
190 If CHECK_SIGN is TRUE, check that either both types are signed or both are
191 unsigned. */
193 static bool
194 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
195 tree *orig_type, gimple *def_stmt, bool *promotion)
197 tree dummy;
198 gimple dummy_gimple;
199 loop_vec_info loop_vinfo;
200 stmt_vec_info stmt_vinfo;
201 tree type = TREE_TYPE (name);
202 tree oprnd0;
203 enum vect_def_type dt;
204 tree def;
205 bb_vec_info bb_vinfo;
207 stmt_vinfo = vinfo_for_stmt (use_stmt);
208 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
209 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
210 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
211 &def, &dt))
212 return false;
214 if (dt != vect_internal_def
215 && dt != vect_external_def && dt != vect_constant_def)
216 return false;
218 if (!*def_stmt)
219 return false;
221 if (!is_gimple_assign (*def_stmt))
222 return false;
224 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
225 return false;
227 oprnd0 = gimple_assign_rhs1 (*def_stmt);
229 *orig_type = TREE_TYPE (oprnd0);
230 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
231 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
232 return false;
234 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
235 *promotion = true;
236 else
237 *promotion = false;
239 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
240 bb_vinfo, &dummy_gimple, &dummy, &dt))
241 return false;
243 return true;
246 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
247 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
249 static tree
250 vect_recog_temp_ssa_var (tree type, gimple stmt)
252 return make_temp_ssa_name (type, stmt, "patt");
255 /* Function vect_recog_dot_prod_pattern
257 Try to find the following pattern:
259 type x_t, y_t;
260 TYPE1 prod;
261 TYPE2 sum = init;
262 loop:
263 sum_0 = phi <init, sum_1>
264 S1 x_t = ...
265 S2 y_t = ...
266 S3 x_T = (TYPE1) x_t;
267 S4 y_T = (TYPE1) y_t;
268 S5 prod = x_T * y_T;
269 [S6 prod = (TYPE2) prod; #optional]
270 S7 sum_1 = prod + sum_0;
272 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
273 same size of 'TYPE1' or bigger. This is a special case of a reduction
274 computation.
276 Input:
278 * STMTS: Contains a stmt from which the pattern search begins. In the
279 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
280 will be detected.
282 Output:
284 * TYPE_IN: The type of the input arguments to the pattern.
286 * TYPE_OUT: The type of the output of this pattern.
288 * Return value: A new stmt that will be used to replace the sequence of
289 stmts that constitute the pattern. In this case it will be:
290 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
292 Note: The dot-prod idiom is a widening reduction pattern that is
293 vectorized without preserving all the intermediate results. It
294 produces only N/2 (widened) results (by summing up pairs of
295 intermediate results) rather than all N results. Therefore, we
296 cannot allow this pattern when we want to get all the results and in
297 the correct order (as is the case when this computation is in an
298 inner-loop nested in an outer-loop that us being vectorized). */
300 static gimple
301 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
302 tree *type_out)
304 gimple stmt, last_stmt = (*stmts)[0];
305 tree oprnd0, oprnd1;
306 tree oprnd00, oprnd01;
307 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
308 tree type, half_type;
309 gimple pattern_stmt;
310 tree prod_type;
311 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
312 struct loop *loop;
313 tree var;
314 bool promotion;
316 if (!loop_info)
317 return NULL;
319 loop = LOOP_VINFO_LOOP (loop_info);
321 if (!is_gimple_assign (last_stmt))
322 return NULL;
324 type = gimple_expr_type (last_stmt);
326 /* Look for the following pattern
327 DX = (TYPE1) X;
328 DY = (TYPE1) Y;
329 DPROD = DX * DY;
330 DDPROD = (TYPE2) DPROD;
331 sum_1 = DDPROD + sum_0;
332 In which
333 - DX is double the size of X
334 - DY is double the size of Y
335 - DX, DY, DPROD all have the same type
336 - sum is the same size of DPROD or bigger
337 - sum has been recognized as a reduction variable.
339 This is equivalent to:
340 DPROD = X w* Y; #widen mult
341 sum_1 = DPROD w+ sum_0; #widen summation
343 DPROD = X w* Y; #widen mult
344 sum_1 = DPROD + sum_0; #summation
347 /* Starting from LAST_STMT, follow the defs of its uses in search
348 of the above pattern. */
350 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
351 return NULL;
353 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
355 /* Has been detected as widening-summation? */
357 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
358 type = gimple_expr_type (stmt);
359 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
360 return NULL;
361 oprnd0 = gimple_assign_rhs1 (stmt);
362 oprnd1 = gimple_assign_rhs2 (stmt);
363 half_type = TREE_TYPE (oprnd0);
365 else
367 gimple def_stmt;
369 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
370 return NULL;
371 oprnd0 = gimple_assign_rhs1 (last_stmt);
372 oprnd1 = gimple_assign_rhs2 (last_stmt);
373 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
374 || !types_compatible_p (TREE_TYPE (oprnd1), type))
375 return NULL;
376 stmt = last_stmt;
378 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
379 &promotion)
380 && promotion)
382 stmt = def_stmt;
383 oprnd0 = gimple_assign_rhs1 (stmt);
385 else
386 half_type = type;
389 /* So far so good. Since last_stmt was detected as a (summation) reduction,
390 we know that oprnd1 is the reduction variable (defined by a loop-header
391 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
392 Left to check that oprnd0 is defined by a (widen_)mult_expr */
393 if (TREE_CODE (oprnd0) != SSA_NAME)
394 return NULL;
396 prod_type = half_type;
397 stmt = SSA_NAME_DEF_STMT (oprnd0);
399 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
400 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
401 return NULL;
403 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
404 inside the loop (in case we are analyzing an outer-loop). */
405 if (!is_gimple_assign (stmt))
406 return NULL;
407 stmt_vinfo = vinfo_for_stmt (stmt);
408 gcc_assert (stmt_vinfo);
409 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
410 return NULL;
411 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
412 return NULL;
413 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
415 /* Has been detected as a widening multiplication? */
417 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
418 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
419 return NULL;
420 stmt_vinfo = vinfo_for_stmt (stmt);
421 gcc_assert (stmt_vinfo);
422 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
423 oprnd00 = gimple_assign_rhs1 (stmt);
424 oprnd01 = gimple_assign_rhs2 (stmt);
425 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
426 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
428 else
430 tree half_type0, half_type1;
431 gimple def_stmt;
432 tree oprnd0, oprnd1;
434 oprnd0 = gimple_assign_rhs1 (stmt);
435 oprnd1 = gimple_assign_rhs2 (stmt);
436 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
437 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
438 return NULL;
439 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
440 &promotion)
441 || !promotion)
442 return NULL;
443 oprnd00 = gimple_assign_rhs1 (def_stmt);
444 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
445 &promotion)
446 || !promotion)
447 return NULL;
448 oprnd01 = gimple_assign_rhs1 (def_stmt);
449 if (!types_compatible_p (half_type0, half_type1))
450 return NULL;
451 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
452 return NULL;
455 half_type = TREE_TYPE (oprnd00);
456 *type_in = half_type;
457 *type_out = type;
459 /* Pattern detected. Create a stmt to be used to replace the pattern: */
460 var = vect_recog_temp_ssa_var (type, NULL);
461 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
462 oprnd00, oprnd01, oprnd1);
464 if (dump_enabled_p ())
466 dump_printf_loc (MSG_NOTE, vect_location,
467 "vect_recog_dot_prod_pattern: detected: ");
468 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
469 dump_printf (MSG_NOTE, "\n");
472 /* We don't allow changing the order of the computation in the inner-loop
473 when doing outer-loop vectorization. */
474 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
476 return pattern_stmt;
480 /* Function vect_recog_sad_pattern
482 Try to find the following Sum of Absolute Difference (SAD) pattern:
484 type x_t, y_t;
485 signed TYPE1 diff, abs_diff;
486 TYPE2 sum = init;
487 loop:
488 sum_0 = phi <init, sum_1>
489 S1 x_t = ...
490 S2 y_t = ...
491 S3 x_T = (TYPE1) x_t;
492 S4 y_T = (TYPE1) y_t;
493 S5 diff = x_T - y_T;
494 S6 abs_diff = ABS_EXPR <diff>;
495 [S7 abs_diff = (TYPE2) abs_diff; #optional]
496 S8 sum_1 = abs_diff + sum_0;
498 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
499 same size of 'TYPE1' or bigger. This is a special case of a reduction
500 computation.
502 Input:
504 * STMTS: Contains a stmt from which the pattern search begins. In the
505 example, when this function is called with S8, the pattern
506 {S3,S4,S5,S6,S7,S8} will be detected.
508 Output:
510 * TYPE_IN: The type of the input arguments to the pattern.
512 * TYPE_OUT: The type of the output of this pattern.
514 * Return value: A new stmt that will be used to replace the sequence of
515 stmts that constitute the pattern. In this case it will be:
516 SAD_EXPR <x_t, y_t, sum_0>
519 static gimple
520 vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
521 tree *type_out)
523 gimple last_stmt = (*stmts)[0];
524 tree sad_oprnd0, sad_oprnd1;
525 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
526 tree half_type;
527 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
528 struct loop *loop;
529 bool promotion;
531 if (!loop_info)
532 return NULL;
534 loop = LOOP_VINFO_LOOP (loop_info);
536 if (!is_gimple_assign (last_stmt))
537 return NULL;
539 tree sum_type = gimple_expr_type (last_stmt);
541 /* Look for the following pattern
542 DX = (TYPE1) X;
543 DY = (TYPE1) Y;
544 DDIFF = DX - DY;
545 DAD = ABS_EXPR <DDIFF>;
546 DDPROD = (TYPE2) DPROD;
547 sum_1 = DAD + sum_0;
548 In which
549 - DX is at least double the size of X
550 - DY is at least double the size of Y
551 - DX, DY, DDIFF, DAD all have the same type
552 - sum is the same size of DAD or bigger
553 - sum has been recognized as a reduction variable.
555 This is equivalent to:
556 DDIFF = X w- Y; #widen sub
557 DAD = ABS_EXPR <DDIFF>;
558 sum_1 = DAD w+ sum_0; #widen summation
560 DDIFF = X w- Y; #widen sub
561 DAD = ABS_EXPR <DDIFF>;
562 sum_1 = DAD + sum_0; #summation
565 /* Starting from LAST_STMT, follow the defs of its uses in search
566 of the above pattern. */
568 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
569 return NULL;
571 tree plus_oprnd0, plus_oprnd1;
573 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
575 /* Has been detected as widening-summation? */
577 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
578 sum_type = gimple_expr_type (stmt);
579 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
580 return NULL;
581 plus_oprnd0 = gimple_assign_rhs1 (stmt);
582 plus_oprnd1 = gimple_assign_rhs2 (stmt);
583 half_type = TREE_TYPE (plus_oprnd0);
585 else
587 gimple def_stmt;
589 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
590 return NULL;
591 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
592 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
593 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
594 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
595 return NULL;
597 /* The type conversion could be promotion, demotion,
598 or just signed -> unsigned. */
599 if (type_conversion_p (plus_oprnd0, last_stmt, false,
600 &half_type, &def_stmt, &promotion))
601 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
602 else
603 half_type = sum_type;
606 /* So far so good. Since last_stmt was detected as a (summation) reduction,
607 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
608 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
609 Then check that plus_oprnd0 is defined by an abs_expr. */
611 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
612 return NULL;
614 tree abs_type = half_type;
615 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
617 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
618 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
619 return NULL;
621 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
622 inside the loop (in case we are analyzing an outer-loop). */
623 if (!is_gimple_assign (abs_stmt))
624 return NULL;
626 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
627 gcc_assert (abs_stmt_vinfo);
628 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
629 return NULL;
630 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
631 return NULL;
633 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
634 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
635 return NULL;
636 if (TYPE_UNSIGNED (abs_type))
637 return NULL;
639 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
641 if (TREE_CODE (abs_oprnd) != SSA_NAME)
642 return NULL;
644 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
646 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
647 if (!gimple_bb (diff_stmt)
648 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
649 return NULL;
651 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
652 inside the loop (in case we are analyzing an outer-loop). */
653 if (!is_gimple_assign (diff_stmt))
654 return NULL;
656 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
657 gcc_assert (diff_stmt_vinfo);
658 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
659 return NULL;
660 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
661 return NULL;
663 tree half_type0, half_type1;
664 gimple def_stmt;
666 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
667 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
669 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
670 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
671 return NULL;
672 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
673 &half_type0, &def_stmt, &promotion)
674 || !promotion)
675 return NULL;
676 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
678 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
679 &half_type1, &def_stmt, &promotion)
680 || !promotion)
681 return NULL;
682 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
684 if (!types_compatible_p (half_type0, half_type1))
685 return NULL;
686 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
687 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
688 return NULL;
690 *type_in = TREE_TYPE (sad_oprnd0);
691 *type_out = sum_type;
693 /* Pattern detected. Create a stmt to be used to replace the pattern: */
694 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
695 gimple pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
696 sad_oprnd1, plus_oprnd1);
698 if (dump_enabled_p ())
700 dump_printf_loc (MSG_NOTE, vect_location,
701 "vect_recog_sad_pattern: detected: ");
702 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
703 dump_printf (MSG_NOTE, "\n");
706 /* We don't allow changing the order of the computation in the inner-loop
707 when doing outer-loop vectorization. */
708 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
710 return pattern_stmt;
714 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
715 and LSHIFT_EXPR.
717 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
718 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
720 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
721 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
722 that satisfies the above restrictions, we can perform a widening opeartion
723 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
724 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
726 static bool
727 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
728 tree const_oprnd, tree *oprnd,
729 gimple *wstmt, tree type,
730 tree *half_type, gimple def_stmt)
732 tree new_type, new_oprnd;
734 if (code != MULT_EXPR && code != LSHIFT_EXPR)
735 return false;
737 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
738 || (code == LSHIFT_EXPR
739 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
740 != 1))
741 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
743 /* CONST_OPRND is a constant of HALF_TYPE. */
744 *oprnd = gimple_assign_rhs1 (def_stmt);
745 return true;
748 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
749 return false;
751 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
752 return false;
754 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
755 a type 2 times bigger than HALF_TYPE. */
756 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
757 TYPE_UNSIGNED (type));
758 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
759 || (code == LSHIFT_EXPR
760 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
761 return false;
763 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
764 *oprnd = gimple_assign_rhs1 (def_stmt);
765 new_oprnd = make_ssa_name (new_type);
766 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
767 *oprnd = new_oprnd;
769 *half_type = new_type;
770 return true;
774 /* Function vect_recog_widen_mult_pattern
776 Try to find the following pattern:
778 type1 a_t;
779 type2 b_t;
780 TYPE a_T, b_T, prod_T;
782 S1 a_t = ;
783 S2 b_t = ;
784 S3 a_T = (TYPE) a_t;
785 S4 b_T = (TYPE) b_t;
786 S5 prod_T = a_T * b_T;
788 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
790 Also detect unsigned cases:
792 unsigned type1 a_t;
793 unsigned type2 b_t;
794 unsigned TYPE u_prod_T;
795 TYPE a_T, b_T, prod_T;
797 S1 a_t = ;
798 S2 b_t = ;
799 S3 a_T = (TYPE) a_t;
800 S4 b_T = (TYPE) b_t;
801 S5 prod_T = a_T * b_T;
802 S6 u_prod_T = (unsigned TYPE) prod_T;
804 and multiplication by constants:
806 type a_t;
807 TYPE a_T, prod_T;
809 S1 a_t = ;
810 S3 a_T = (TYPE) a_t;
811 S5 prod_T = a_T * CONST;
813 A special case of multiplication by constants is when 'TYPE' is 4 times
814 bigger than 'type', but CONST fits an intermediate type 2 times smaller
815 than 'TYPE'. In that case we create an additional pattern stmt for S3
816 to create a variable of the intermediate type, and perform widen-mult
817 on the intermediate type as well:
819 type a_t;
820 interm_type a_it;
821 TYPE a_T, prod_T, prod_T';
823 S1 a_t = ;
824 S3 a_T = (TYPE) a_t;
825 '--> a_it = (interm_type) a_t;
826 S5 prod_T = a_T * CONST;
827 '--> prod_T' = a_it w* CONST;
829 Input/Output:
831 * STMTS: Contains a stmt from which the pattern search begins. In the
832 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
833 is detected. In case of unsigned widen-mult, the original stmt (S5) is
834 replaced with S6 in STMTS. In case of multiplication by a constant
835 of an intermediate type (the last case above), STMTS also contains S3
836 (inserted before S5).
838 Output:
840 * TYPE_IN: The type of the input arguments to the pattern.
842 * TYPE_OUT: The type of the output of this pattern.
844 * Return value: A new stmt that will be used to replace the sequence of
845 stmts that constitute the pattern. In this case it will be:
846 WIDEN_MULT <a_t, b_t>
847 If the result of WIDEN_MULT needs to be converted to a larger type, the
848 returned stmt will be this type conversion stmt.
851 static gimple
852 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
853 tree *type_in, tree *type_out)
855 gimple last_stmt = stmts->pop ();
856 gimple def_stmt0, def_stmt1;
857 tree oprnd0, oprnd1;
858 tree type, half_type0, half_type1;
859 gimple new_stmt = NULL, pattern_stmt = NULL;
860 tree vectype, vecitype;
861 tree var;
862 enum tree_code dummy_code;
863 int dummy_int;
864 vec<tree> dummy_vec;
865 bool op1_ok;
866 bool promotion;
868 if (!is_gimple_assign (last_stmt))
869 return NULL;
871 type = gimple_expr_type (last_stmt);
873 /* Starting from LAST_STMT, follow the defs of its uses in search
874 of the above pattern. */
876 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
877 return NULL;
879 oprnd0 = gimple_assign_rhs1 (last_stmt);
880 oprnd1 = gimple_assign_rhs2 (last_stmt);
881 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
882 || !types_compatible_p (TREE_TYPE (oprnd1), type))
883 return NULL;
885 /* Check argument 0. */
886 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
887 &promotion)
888 || !promotion)
889 return NULL;
890 /* Check argument 1. */
891 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
892 &def_stmt1, &promotion);
894 if (op1_ok && promotion)
896 oprnd0 = gimple_assign_rhs1 (def_stmt0);
897 oprnd1 = gimple_assign_rhs1 (def_stmt1);
899 else
901 if (TREE_CODE (oprnd1) == INTEGER_CST
902 && TREE_CODE (half_type0) == INTEGER_TYPE
903 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
904 &oprnd0, &new_stmt, type,
905 &half_type0, def_stmt0))
907 half_type1 = half_type0;
908 oprnd1 = fold_convert (half_type1, oprnd1);
910 else
911 return NULL;
914 /* If the two arguments have different sizes, convert the one with
915 the smaller type into the larger type. */
916 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
918 /* If we already used up the single-stmt slot give up. */
919 if (new_stmt)
920 return NULL;
922 tree* oprnd = NULL;
923 gimple def_stmt = NULL;
925 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
927 def_stmt = def_stmt0;
928 half_type0 = half_type1;
929 oprnd = &oprnd0;
931 else
933 def_stmt = def_stmt1;
934 half_type1 = half_type0;
935 oprnd = &oprnd1;
938 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
939 tree new_oprnd = make_ssa_name (half_type0);
940 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
941 *oprnd = new_oprnd;
944 /* Handle unsigned case. Look for
945 S6 u_prod_T = (unsigned TYPE) prod_T;
946 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
947 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
949 gimple use_stmt;
950 tree use_lhs;
951 tree use_type;
953 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
954 return NULL;
956 use_stmt = vect_single_imm_use (last_stmt);
957 if (!use_stmt || !is_gimple_assign (use_stmt)
958 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
959 return NULL;
961 use_lhs = gimple_assign_lhs (use_stmt);
962 use_type = TREE_TYPE (use_lhs);
963 if (!INTEGRAL_TYPE_P (use_type)
964 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
965 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
966 return NULL;
968 type = use_type;
969 last_stmt = use_stmt;
972 if (!types_compatible_p (half_type0, half_type1))
973 return NULL;
975 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
976 to get an intermediate result of type ITYPE. In this case we need
977 to build a statement to convert this intermediate result to type TYPE. */
978 tree itype = type;
979 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
980 itype = build_nonstandard_integer_type
981 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
982 TYPE_UNSIGNED (type));
984 /* Pattern detected. */
985 if (dump_enabled_p ())
986 dump_printf_loc (MSG_NOTE, vect_location,
987 "vect_recog_widen_mult_pattern: detected:\n");
989 /* Check target support */
990 vectype = get_vectype_for_scalar_type (half_type0);
991 vecitype = get_vectype_for_scalar_type (itype);
992 if (!vectype
993 || !vecitype
994 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
995 vecitype, vectype,
996 &dummy_code, &dummy_code,
997 &dummy_int, &dummy_vec))
998 return NULL;
1000 *type_in = vectype;
1001 *type_out = get_vectype_for_scalar_type (type);
1003 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1004 var = vect_recog_temp_ssa_var (itype, NULL);
1005 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
1007 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1008 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1009 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1010 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1012 /* If the original two operands have different sizes, we may need to convert
1013 the smaller one into the larget type. If this is the case, at this point
1014 the new stmt is already built. */
1015 if (new_stmt)
1017 append_pattern_def_seq (stmt_vinfo, new_stmt);
1018 stmt_vec_info new_stmt_info
1019 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
1020 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1021 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1024 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1025 the result of the widen-mult operation into type TYPE. */
1026 if (itype != type)
1028 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1029 stmt_vec_info pattern_stmt_info
1030 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1031 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1032 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1033 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1034 NOP_EXPR,
1035 gimple_assign_lhs (pattern_stmt));
1038 if (dump_enabled_p ())
1039 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1041 stmts->safe_push (last_stmt);
1042 return pattern_stmt;
1046 /* Function vect_recog_pow_pattern
1048 Try to find the following pattern:
1050 x = POW (y, N);
1052 with POW being one of pow, powf, powi, powif and N being
1053 either 2 or 0.5.
1055 Input:
1057 * LAST_STMT: A stmt from which the pattern search begins.
1059 Output:
1061 * TYPE_IN: The type of the input arguments to the pattern.
1063 * TYPE_OUT: The type of the output of this pattern.
1065 * Return value: A new stmt that will be used to replace the sequence of
1066 stmts that constitute the pattern. In this case it will be:
1067 x = x * x
1069 x = sqrt (x)
1072 static gimple
1073 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
1074 tree *type_out)
1076 gimple last_stmt = (*stmts)[0];
1077 tree fn, base, exp = NULL;
1078 gimple stmt;
1079 tree var;
1081 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1082 return NULL;
1084 fn = gimple_call_fndecl (last_stmt);
1085 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1086 return NULL;
1088 switch (DECL_FUNCTION_CODE (fn))
1090 case BUILT_IN_POWIF:
1091 case BUILT_IN_POWI:
1092 case BUILT_IN_POWF:
1093 case BUILT_IN_POW:
1094 base = gimple_call_arg (last_stmt, 0);
1095 exp = gimple_call_arg (last_stmt, 1);
1096 if (TREE_CODE (exp) != REAL_CST
1097 && TREE_CODE (exp) != INTEGER_CST)
1098 return NULL;
1099 break;
1101 default:
1102 return NULL;
1105 /* We now have a pow or powi builtin function call with a constant
1106 exponent. */
1108 *type_out = NULL_TREE;
1110 /* Catch squaring. */
1111 if ((tree_fits_shwi_p (exp)
1112 && tree_to_shwi (exp) == 2)
1113 || (TREE_CODE (exp) == REAL_CST
1114 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
1116 *type_in = TREE_TYPE (base);
1118 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1119 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1120 return stmt;
1123 /* Catch square root. */
1124 if (TREE_CODE (exp) == REAL_CST
1125 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1127 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1128 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1129 if (*type_in)
1131 gcall *stmt = gimple_build_call (newfn, 1, base);
1132 if (vectorizable_function (stmt, *type_in, *type_in)
1133 != NULL_TREE)
1135 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1136 gimple_call_set_lhs (stmt, var);
1137 return stmt;
1142 return NULL;
1146 /* Function vect_recog_widen_sum_pattern
1148 Try to find the following pattern:
1150 type x_t;
1151 TYPE x_T, sum = init;
1152 loop:
1153 sum_0 = phi <init, sum_1>
1154 S1 x_t = *p;
1155 S2 x_T = (TYPE) x_t;
1156 S3 sum_1 = x_T + sum_0;
1158 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1159 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1160 a special case of a reduction computation.
1162 Input:
1164 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1165 when this function is called with S3, the pattern {S2,S3} will be detected.
1167 Output:
1169 * TYPE_IN: The type of the input arguments to the pattern.
1171 * TYPE_OUT: The type of the output of this pattern.
1173 * Return value: A new stmt that will be used to replace the sequence of
1174 stmts that constitute the pattern. In this case it will be:
1175 WIDEN_SUM <x_t, sum_0>
1177 Note: The widening-sum idiom is a widening reduction pattern that is
1178 vectorized without preserving all the intermediate results. It
1179 produces only N/2 (widened) results (by summing up pairs of
1180 intermediate results) rather than all N results. Therefore, we
1181 cannot allow this pattern when we want to get all the results and in
1182 the correct order (as is the case when this computation is in an
1183 inner-loop nested in an outer-loop that us being vectorized). */
1185 static gimple
1186 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
1187 tree *type_out)
1189 gimple stmt, last_stmt = (*stmts)[0];
1190 tree oprnd0, oprnd1;
1191 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1192 tree type, half_type;
1193 gimple pattern_stmt;
1194 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1195 struct loop *loop;
1196 tree var;
1197 bool promotion;
1199 if (!loop_info)
1200 return NULL;
1202 loop = LOOP_VINFO_LOOP (loop_info);
1204 if (!is_gimple_assign (last_stmt))
1205 return NULL;
1207 type = gimple_expr_type (last_stmt);
1209 /* Look for the following pattern
1210 DX = (TYPE) X;
1211 sum_1 = DX + sum_0;
1212 In which DX is at least double the size of X, and sum_1 has been
1213 recognized as a reduction variable.
1216 /* Starting from LAST_STMT, follow the defs of its uses in search
1217 of the above pattern. */
1219 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1220 return NULL;
1222 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
1223 return NULL;
1225 oprnd0 = gimple_assign_rhs1 (last_stmt);
1226 oprnd1 = gimple_assign_rhs2 (last_stmt);
1227 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1228 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1229 return NULL;
1231 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1232 we know that oprnd1 is the reduction variable (defined by a loop-header
1233 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1234 Left to check that oprnd0 is defined by a cast from type 'type' to type
1235 'TYPE'. */
1237 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1238 &promotion)
1239 || !promotion)
1240 return NULL;
1242 oprnd0 = gimple_assign_rhs1 (stmt);
1243 *type_in = half_type;
1244 *type_out = type;
1246 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1247 var = vect_recog_temp_ssa_var (type, NULL);
1248 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1250 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE, vect_location,
1253 "vect_recog_widen_sum_pattern: detected: ");
1254 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1255 dump_printf (MSG_NOTE, "\n");
1258 /* We don't allow changing the order of the computation in the inner-loop
1259 when doing outer-loop vectorization. */
1260 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
1262 return pattern_stmt;
1266 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1268 Input:
1269 STMT - a statement to check.
1270 DEF - we support operations with two operands, one of which is constant.
1271 The other operand can be defined by a demotion operation, or by a
1272 previous statement in a sequence of over-promoted operations. In the
1273 later case DEF is used to replace that operand. (It is defined by a
1274 pattern statement we created for the previous statement in the
1275 sequence).
1277 Input/output:
1278 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1279 NULL, it's the type of DEF.
1280 STMTS - additional pattern statements. If a pattern statement (type
1281 conversion) is created in this function, its original statement is
1282 added to STMTS.
1284 Output:
1285 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1286 operands to use in the new pattern statement for STMT (will be created
1287 in vect_recog_over_widening_pattern ()).
1288 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1289 statements for STMT: the first one is a type promotion and the second
1290 one is the operation itself. We return the type promotion statement
1291 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1292 the second pattern statement. */
1294 static bool
1295 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1296 tree *op0, tree *op1, gimple *new_def_stmt,
1297 vec<gimple> *stmts)
1299 enum tree_code code;
1300 tree const_oprnd, oprnd;
1301 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1302 gimple def_stmt, new_stmt;
1303 bool first = false;
1304 bool promotion;
1306 *op0 = NULL_TREE;
1307 *op1 = NULL_TREE;
1308 *new_def_stmt = NULL;
1310 if (!is_gimple_assign (stmt))
1311 return false;
1313 code = gimple_assign_rhs_code (stmt);
1314 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1315 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1316 return false;
1318 oprnd = gimple_assign_rhs1 (stmt);
1319 const_oprnd = gimple_assign_rhs2 (stmt);
1320 type = gimple_expr_type (stmt);
1322 if (TREE_CODE (oprnd) != SSA_NAME
1323 || TREE_CODE (const_oprnd) != INTEGER_CST)
1324 return false;
1326 /* If oprnd has other uses besides that in stmt we cannot mark it
1327 as being part of a pattern only. */
1328 if (!has_single_use (oprnd))
1329 return false;
1331 /* If we are in the middle of a sequence, we use DEF from a previous
1332 statement. Otherwise, OPRND has to be a result of type promotion. */
1333 if (*new_type)
1335 half_type = *new_type;
1336 oprnd = def;
1338 else
1340 first = true;
1341 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1342 &promotion)
1343 || !promotion
1344 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1345 return false;
1348 /* Can we perform the operation on a smaller type? */
1349 switch (code)
1351 case BIT_IOR_EXPR:
1352 case BIT_XOR_EXPR:
1353 case BIT_AND_EXPR:
1354 if (!int_fits_type_p (const_oprnd, half_type))
1356 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1357 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1358 return false;
1360 interm_type = build_nonstandard_integer_type (
1361 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1362 if (!int_fits_type_p (const_oprnd, interm_type))
1363 return false;
1366 break;
1368 case LSHIFT_EXPR:
1369 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1370 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1371 return false;
1373 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1374 (e.g., if the original value was char, the shift amount is at most 8
1375 if we want to use short). */
1376 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1377 return false;
1379 interm_type = build_nonstandard_integer_type (
1380 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1382 if (!vect_supportable_shift (code, interm_type))
1383 return false;
1385 break;
1387 case RSHIFT_EXPR:
1388 if (vect_supportable_shift (code, half_type))
1389 break;
1391 /* Try intermediate type - HALF_TYPE is not supported. */
1392 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1393 return false;
1395 interm_type = build_nonstandard_integer_type (
1396 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1398 if (!vect_supportable_shift (code, interm_type))
1399 return false;
1401 break;
1403 default:
1404 gcc_unreachable ();
1407 /* There are four possible cases:
1408 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1409 the first statement in the sequence)
1410 a. The original, HALF_TYPE, is not enough - we replace the promotion
1411 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1412 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1413 promotion.
1414 2. OPRND is defined by a pattern statement we created.
1415 a. Its type is not sufficient for the operation, we create a new stmt:
1416 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1417 this statement in NEW_DEF_STMT, and it is later put in
1418 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1419 b. OPRND is good to use in the new statement. */
1420 if (first)
1422 if (interm_type)
1424 /* Replace the original type conversion HALF_TYPE->TYPE with
1425 HALF_TYPE->INTERM_TYPE. */
1426 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1428 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1429 /* Check if the already created pattern stmt is what we need. */
1430 if (!is_gimple_assign (new_stmt)
1431 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1432 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1433 return false;
1435 stmts->safe_push (def_stmt);
1436 oprnd = gimple_assign_lhs (new_stmt);
1438 else
1440 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1441 oprnd = gimple_assign_rhs1 (def_stmt);
1442 new_oprnd = make_ssa_name (interm_type);
1443 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1444 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1445 stmts->safe_push (def_stmt);
1446 oprnd = new_oprnd;
1449 else
1451 /* Retrieve the operand before the type promotion. */
1452 oprnd = gimple_assign_rhs1 (def_stmt);
1455 else
1457 if (interm_type)
1459 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1460 new_oprnd = make_ssa_name (interm_type);
1461 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1462 oprnd = new_oprnd;
1463 *new_def_stmt = new_stmt;
1466 /* Otherwise, OPRND is already set. */
1469 if (interm_type)
1470 *new_type = interm_type;
1471 else
1472 *new_type = half_type;
1474 *op0 = oprnd;
1475 *op1 = fold_convert (*new_type, const_oprnd);
1477 return true;
1481 /* Try to find a statement or a sequence of statements that can be performed
1482 on a smaller type:
1484 type x_t;
1485 TYPE x_T, res0_T, res1_T;
1486 loop:
1487 S1 x_t = *p;
1488 S2 x_T = (TYPE) x_t;
1489 S3 res0_T = op (x_T, C0);
1490 S4 res1_T = op (res0_T, C1);
1491 S5 ... = () res1_T; - type demotion
1493 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1494 constants.
1495 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1496 be 'type' or some intermediate type. For now, we expect S5 to be a type
1497 demotion operation. We also check that S3 and S4 have only one use. */
1499 static gimple
1500 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1501 tree *type_in, tree *type_out)
1503 gimple stmt = stmts->pop ();
1504 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1505 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1506 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1507 bool first;
1508 tree type = NULL;
1510 first = true;
1511 while (1)
1513 if (!vinfo_for_stmt (stmt)
1514 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1515 return NULL;
1517 new_def_stmt = NULL;
1518 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1519 &op0, &op1, &new_def_stmt,
1520 stmts))
1522 if (first)
1523 return NULL;
1524 else
1525 break;
1528 /* STMT can be performed on a smaller type. Check its uses. */
1529 use_stmt = vect_single_imm_use (stmt);
1530 if (!use_stmt || !is_gimple_assign (use_stmt))
1531 return NULL;
1533 /* Create pattern statement for STMT. */
1534 vectype = get_vectype_for_scalar_type (new_type);
1535 if (!vectype)
1536 return NULL;
1538 /* We want to collect all the statements for which we create pattern
1539 statetments, except for the case when the last statement in the
1540 sequence doesn't have a corresponding pattern statement. In such
1541 case we associate the last pattern statement with the last statement
1542 in the sequence. Therefore, we only add the original statement to
1543 the list if we know that it is not the last. */
1544 if (prev_stmt)
1545 stmts->safe_push (prev_stmt);
1547 var = vect_recog_temp_ssa_var (new_type, NULL);
1548 pattern_stmt
1549 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1550 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1551 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1553 if (dump_enabled_p ())
1555 dump_printf_loc (MSG_NOTE, vect_location,
1556 "created pattern stmt: ");
1557 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1558 dump_printf (MSG_NOTE, "\n");
1561 type = gimple_expr_type (stmt);
1562 prev_stmt = stmt;
1563 stmt = use_stmt;
1565 first = false;
1568 /* We got a sequence. We expect it to end with a type demotion operation.
1569 Otherwise, we quit (for now). There are three possible cases: the
1570 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1571 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1572 NEW_TYPE differs (we create a new conversion statement). */
1573 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1575 use_lhs = gimple_assign_lhs (use_stmt);
1576 use_type = TREE_TYPE (use_lhs);
1577 /* Support only type demotion or signedess change. */
1578 if (!INTEGRAL_TYPE_P (use_type)
1579 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1580 return NULL;
1582 /* Check that NEW_TYPE is not bigger than the conversion result. */
1583 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1584 return NULL;
1586 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1587 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1589 /* Create NEW_TYPE->USE_TYPE conversion. */
1590 new_oprnd = make_ssa_name (use_type);
1591 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1592 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1594 *type_in = get_vectype_for_scalar_type (new_type);
1595 *type_out = get_vectype_for_scalar_type (use_type);
1597 /* We created a pattern statement for the last statement in the
1598 sequence, so we don't need to associate it with the pattern
1599 statement created for PREV_STMT. Therefore, we add PREV_STMT
1600 to the list in order to mark it later in vect_pattern_recog_1. */
1601 if (prev_stmt)
1602 stmts->safe_push (prev_stmt);
1604 else
1606 if (prev_stmt)
1607 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1608 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1610 *type_in = vectype;
1611 *type_out = NULL_TREE;
1614 stmts->safe_push (use_stmt);
1616 else
1617 /* TODO: support general case, create a conversion to the correct type. */
1618 return NULL;
1620 /* Pattern detected. */
1621 if (dump_enabled_p ())
1623 dump_printf_loc (MSG_NOTE, vect_location,
1624 "vect_recog_over_widening_pattern: detected: ");
1625 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1626 dump_printf (MSG_NOTE, "\n");
1629 return pattern_stmt;
1632 /* Detect widening shift pattern:
1634 type a_t;
1635 TYPE a_T, res_T;
1637 S1 a_t = ;
1638 S2 a_T = (TYPE) a_t;
1639 S3 res_T = a_T << CONST;
1641 where type 'TYPE' is at least double the size of type 'type'.
1643 Also detect cases where the shift result is immediately converted
1644 to another type 'result_type' that is no larger in size than 'TYPE'.
1645 In those cases we perform a widen-shift that directly results in
1646 'result_type', to avoid a possible over-widening situation:
1648 type a_t;
1649 TYPE a_T, res_T;
1650 result_type res_result;
1652 S1 a_t = ;
1653 S2 a_T = (TYPE) a_t;
1654 S3 res_T = a_T << CONST;
1655 S4 res_result = (result_type) res_T;
1656 '--> res_result' = a_t w<< CONST;
1658 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1659 create an additional pattern stmt for S2 to create a variable of an
1660 intermediate type, and perform widen-shift on the intermediate type:
1662 type a_t;
1663 interm_type a_it;
1664 TYPE a_T, res_T, res_T';
1666 S1 a_t = ;
1667 S2 a_T = (TYPE) a_t;
1668 '--> a_it = (interm_type) a_t;
1669 S3 res_T = a_T << CONST;
1670 '--> res_T' = a_it <<* CONST;
1672 Input/Output:
1674 * STMTS: Contains a stmt from which the pattern search begins.
1675 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1676 in STMTS. When an intermediate type is used and a pattern statement is
1677 created for S2, we also put S2 here (before S3).
1679 Output:
1681 * TYPE_IN: The type of the input arguments to the pattern.
1683 * TYPE_OUT: The type of the output of this pattern.
1685 * Return value: A new stmt that will be used to replace the sequence of
1686 stmts that constitute the pattern. In this case it will be:
1687 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1689 static gimple
1690 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1691 tree *type_in, tree *type_out)
1693 gimple last_stmt = stmts->pop ();
1694 gimple def_stmt0;
1695 tree oprnd0, oprnd1;
1696 tree type, half_type0;
1697 gimple pattern_stmt;
1698 tree vectype, vectype_out = NULL_TREE;
1699 tree var;
1700 enum tree_code dummy_code;
1701 int dummy_int;
1702 vec<tree> dummy_vec;
1703 gimple use_stmt;
1704 bool promotion;
1706 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1707 return NULL;
1709 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1710 return NULL;
1712 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1713 return NULL;
1715 oprnd0 = gimple_assign_rhs1 (last_stmt);
1716 oprnd1 = gimple_assign_rhs2 (last_stmt);
1717 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1718 return NULL;
1720 /* Check operand 0: it has to be defined by a type promotion. */
1721 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1722 &promotion)
1723 || !promotion)
1724 return NULL;
1726 /* Check operand 1: has to be positive. We check that it fits the type
1727 in vect_handle_widen_op_by_const (). */
1728 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1729 return NULL;
1731 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1732 type = gimple_expr_type (last_stmt);
1734 /* Check for subsequent conversion to another type. */
1735 use_stmt = vect_single_imm_use (last_stmt);
1736 if (use_stmt && is_gimple_assign (use_stmt)
1737 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1738 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1740 tree use_lhs = gimple_assign_lhs (use_stmt);
1741 tree use_type = TREE_TYPE (use_lhs);
1743 if (INTEGRAL_TYPE_P (use_type)
1744 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1746 last_stmt = use_stmt;
1747 type = use_type;
1751 /* Check if this a widening operation. */
1752 gimple wstmt = NULL;
1753 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1754 &oprnd0, &wstmt,
1755 type, &half_type0, def_stmt0))
1756 return NULL;
1758 /* Pattern detected. */
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_NOTE, vect_location,
1761 "vect_recog_widen_shift_pattern: detected:\n");
1763 /* Check target support. */
1764 vectype = get_vectype_for_scalar_type (half_type0);
1765 vectype_out = get_vectype_for_scalar_type (type);
1767 if (!vectype
1768 || !vectype_out
1769 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1770 vectype_out, vectype,
1771 &dummy_code, &dummy_code,
1772 &dummy_int, &dummy_vec))
1773 return NULL;
1775 *type_in = vectype;
1776 *type_out = vectype_out;
1778 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1779 var = vect_recog_temp_ssa_var (type, NULL);
1780 pattern_stmt =
1781 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1782 if (wstmt)
1784 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1785 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1786 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1787 new_pattern_def_seq (stmt_vinfo, wstmt);
1788 stmt_vec_info new_stmt_info
1789 = new_stmt_vec_info (wstmt, loop_vinfo, bb_vinfo);
1790 set_vinfo_for_stmt (wstmt, new_stmt_info);
1791 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1794 if (dump_enabled_p ())
1795 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1797 stmts->safe_push (last_stmt);
1798 return pattern_stmt;
1801 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1803 type a_t, b_t, c_t;
1805 S0 a_t = b_t r<< c_t;
1807 Input/Output:
1809 * STMTS: Contains a stmt from which the pattern search begins,
1810 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1811 with a sequence:
1813 S1 d_t = -c_t;
1814 S2 e_t = d_t & (B - 1);
1815 S3 f_t = b_t << c_t;
1816 S4 g_t = b_t >> e_t;
1817 S0 a_t = f_t | g_t;
1819 where B is element bitsize of type.
1821 Output:
1823 * TYPE_IN: The type of the input arguments to the pattern.
1825 * TYPE_OUT: The type of the output of this pattern.
1827 * Return value: A new stmt that will be used to replace the rotate
1828 S0 stmt. */
1830 static gimple
1831 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1833 gimple last_stmt = stmts->pop ();
1834 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1835 gimple pattern_stmt, def_stmt;
1836 enum tree_code rhs_code;
1837 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1838 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1839 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1840 enum vect_def_type dt;
1841 optab optab1, optab2;
1842 edge ext_def = NULL;
1844 if (!is_gimple_assign (last_stmt))
1845 return NULL;
1847 rhs_code = gimple_assign_rhs_code (last_stmt);
1848 switch (rhs_code)
1850 case LROTATE_EXPR:
1851 case RROTATE_EXPR:
1852 break;
1853 default:
1854 return NULL;
1857 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1858 return NULL;
1860 lhs = gimple_assign_lhs (last_stmt);
1861 oprnd0 = gimple_assign_rhs1 (last_stmt);
1862 type = TREE_TYPE (oprnd0);
1863 oprnd1 = gimple_assign_rhs2 (last_stmt);
1864 if (TREE_CODE (oprnd0) != SSA_NAME
1865 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1866 || !INTEGRAL_TYPE_P (type)
1867 || !TYPE_UNSIGNED (type))
1868 return NULL;
1870 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1871 &def, &dt))
1872 return NULL;
1874 if (dt != vect_internal_def
1875 && dt != vect_constant_def
1876 && dt != vect_external_def)
1877 return NULL;
1879 vectype = get_vectype_for_scalar_type (type);
1880 if (vectype == NULL_TREE)
1881 return NULL;
1883 /* If vector/vector or vector/scalar rotate is supported by the target,
1884 don't do anything here. */
1885 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1886 if (optab1
1887 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1888 return NULL;
1890 if (bb_vinfo != NULL || dt != vect_internal_def)
1892 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1893 if (optab2
1894 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1895 return NULL;
1898 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1899 don't do anything here either. */
1900 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1901 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1902 if (!optab1
1903 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1904 || !optab2
1905 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1907 if (bb_vinfo == NULL && dt == vect_internal_def)
1908 return NULL;
1909 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1910 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1911 if (!optab1
1912 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1913 || !optab2
1914 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1915 return NULL;
1918 *type_in = vectype;
1919 *type_out = vectype;
1920 if (*type_in == NULL_TREE)
1921 return NULL;
1923 if (dt == vect_external_def
1924 && TREE_CODE (oprnd1) == SSA_NAME
1925 && loop_vinfo)
1927 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1928 ext_def = loop_preheader_edge (loop);
1929 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1931 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1932 if (bb == NULL
1933 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1934 ext_def = NULL;
1938 def = NULL_TREE;
1939 if (TREE_CODE (oprnd1) == INTEGER_CST
1940 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1941 def = oprnd1;
1942 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1944 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1945 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1946 && TYPE_PRECISION (TREE_TYPE (rhs1))
1947 == TYPE_PRECISION (type))
1948 def = rhs1;
1951 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1952 if (def == NULL_TREE)
1954 def = vect_recog_temp_ssa_var (type, NULL);
1955 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1956 if (ext_def)
1958 basic_block new_bb
1959 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1960 gcc_assert (!new_bb);
1962 else
1963 append_pattern_def_seq (stmt_vinfo, def_stmt);
1965 stype = TREE_TYPE (def);
1967 if (TREE_CODE (def) == INTEGER_CST)
1969 if (!tree_fits_uhwi_p (def)
1970 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1971 || integer_zerop (def))
1972 return NULL;
1973 def2 = build_int_cst (stype,
1974 GET_MODE_PRECISION (TYPE_MODE (type))
1975 - tree_to_uhwi (def));
1977 else
1979 tree vecstype = get_vectype_for_scalar_type (stype);
1980 stmt_vec_info def_stmt_vinfo;
1982 if (vecstype == NULL_TREE)
1983 return NULL;
1984 def2 = vect_recog_temp_ssa_var (stype, NULL);
1985 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1986 if (ext_def)
1988 basic_block new_bb
1989 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1990 gcc_assert (!new_bb);
1992 else
1994 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1995 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1996 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1997 append_pattern_def_seq (stmt_vinfo, def_stmt);
2000 def2 = vect_recog_temp_ssa_var (stype, NULL);
2001 tree mask
2002 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
2003 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2004 gimple_assign_lhs (def_stmt), mask);
2005 if (ext_def)
2007 basic_block new_bb
2008 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2009 gcc_assert (!new_bb);
2011 else
2013 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2014 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2015 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2016 append_pattern_def_seq (stmt_vinfo, def_stmt);
2020 var1 = vect_recog_temp_ssa_var (type, NULL);
2021 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2022 ? LSHIFT_EXPR : RSHIFT_EXPR,
2023 oprnd0, def);
2024 append_pattern_def_seq (stmt_vinfo, def_stmt);
2026 var2 = vect_recog_temp_ssa_var (type, NULL);
2027 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2028 ? RSHIFT_EXPR : LSHIFT_EXPR,
2029 oprnd0, def2);
2030 append_pattern_def_seq (stmt_vinfo, def_stmt);
2032 /* Pattern detected. */
2033 if (dump_enabled_p ())
2034 dump_printf_loc (MSG_NOTE, vect_location,
2035 "vect_recog_rotate_pattern: detected:\n");
2037 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2038 var = vect_recog_temp_ssa_var (type, NULL);
2039 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2041 if (dump_enabled_p ())
2042 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2044 stmts->safe_push (last_stmt);
2045 return pattern_stmt;
2048 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2049 vectorized:
2051 type a_t;
2052 TYPE b_T, res_T;
2054 S1 a_t = ;
2055 S2 b_T = ;
2056 S3 res_T = b_T op a_t;
2058 where type 'TYPE' is a type with different size than 'type',
2059 and op is <<, >> or rotate.
2061 Also detect cases:
2063 type a_t;
2064 TYPE b_T, c_T, res_T;
2066 S0 c_T = ;
2067 S1 a_t = (type) c_T;
2068 S2 b_T = ;
2069 S3 res_T = b_T op a_t;
2071 Input/Output:
2073 * STMTS: Contains a stmt from which the pattern search begins,
2074 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2075 with a shift/rotate which has same type on both operands, in the
2076 second case just b_T op c_T, in the first case with added cast
2077 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2079 Output:
2081 * TYPE_IN: The type of the input arguments to the pattern.
2083 * TYPE_OUT: The type of the output of this pattern.
2085 * Return value: A new stmt that will be used to replace the shift/rotate
2086 S3 stmt. */
2088 static gimple
2089 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
2090 tree *type_in, tree *type_out)
2092 gimple last_stmt = stmts->pop ();
2093 tree oprnd0, oprnd1, lhs, var;
2094 gimple pattern_stmt, def_stmt;
2095 enum tree_code rhs_code;
2096 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2097 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2098 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2099 enum vect_def_type dt;
2100 tree def;
2102 if (!is_gimple_assign (last_stmt))
2103 return NULL;
2105 rhs_code = gimple_assign_rhs_code (last_stmt);
2106 switch (rhs_code)
2108 case LSHIFT_EXPR:
2109 case RSHIFT_EXPR:
2110 case LROTATE_EXPR:
2111 case RROTATE_EXPR:
2112 break;
2113 default:
2114 return NULL;
2117 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2118 return NULL;
2120 lhs = gimple_assign_lhs (last_stmt);
2121 oprnd0 = gimple_assign_rhs1 (last_stmt);
2122 oprnd1 = gimple_assign_rhs2 (last_stmt);
2123 if (TREE_CODE (oprnd0) != SSA_NAME
2124 || TREE_CODE (oprnd1) != SSA_NAME
2125 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2126 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2127 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2128 || TYPE_PRECISION (TREE_TYPE (lhs))
2129 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2130 return NULL;
2132 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
2133 &def, &dt))
2134 return NULL;
2136 if (dt != vect_internal_def)
2137 return NULL;
2139 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2140 *type_out = *type_in;
2141 if (*type_in == NULL_TREE)
2142 return NULL;
2144 def = NULL_TREE;
2145 if (gimple_assign_cast_p (def_stmt))
2147 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2148 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2149 && TYPE_PRECISION (TREE_TYPE (rhs1))
2150 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2151 def = rhs1;
2154 if (def == NULL_TREE)
2156 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2157 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2158 new_pattern_def_seq (stmt_vinfo, def_stmt);
2161 /* Pattern detected. */
2162 if (dump_enabled_p ())
2163 dump_printf_loc (MSG_NOTE, vect_location,
2164 "vect_recog_vector_vector_shift_pattern: detected:\n");
2166 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2167 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2168 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2170 if (dump_enabled_p ())
2171 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2173 stmts->safe_push (last_stmt);
2174 return pattern_stmt;
2177 /* Detect a signed division by a constant that wouldn't be
2178 otherwise vectorized:
2180 type a_t, b_t;
2182 S1 a_t = b_t / N;
2184 where type 'type' is an integral type and N is a constant.
2186 Similarly handle modulo by a constant:
2188 S4 a_t = b_t % N;
2190 Input/Output:
2192 * STMTS: Contains a stmt from which the pattern search begins,
2193 i.e. the division stmt. S1 is replaced by if N is a power
2194 of two constant and type is signed:
2195 S3 y_t = b_t < 0 ? N - 1 : 0;
2196 S2 x_t = b_t + y_t;
2197 S1' a_t = x_t >> log2 (N);
2199 S4 is replaced if N is a power of two constant and
2200 type is signed by (where *_T temporaries have unsigned type):
2201 S9 y_T = b_t < 0 ? -1U : 0U;
2202 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2203 S7 z_t = (type) z_T;
2204 S6 w_t = b_t + z_t;
2205 S5 x_t = w_t & (N - 1);
2206 S4' a_t = x_t - z_t;
2208 Output:
2210 * TYPE_IN: The type of the input arguments to the pattern.
2212 * TYPE_OUT: The type of the output of this pattern.
2214 * Return value: A new stmt that will be used to replace the division
2215 S1 or modulo S4 stmt. */
2217 static gimple
2218 vect_recog_divmod_pattern (vec<gimple> *stmts,
2219 tree *type_in, tree *type_out)
2221 gimple last_stmt = stmts->pop ();
2222 tree oprnd0, oprnd1, vectype, itype, cond;
2223 gimple pattern_stmt, def_stmt;
2224 enum tree_code rhs_code;
2225 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2226 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2227 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2228 optab optab;
2229 tree q;
2230 int dummy_int, prec;
2231 stmt_vec_info def_stmt_vinfo;
2233 if (!is_gimple_assign (last_stmt))
2234 return NULL;
2236 rhs_code = gimple_assign_rhs_code (last_stmt);
2237 switch (rhs_code)
2239 case TRUNC_DIV_EXPR:
2240 case TRUNC_MOD_EXPR:
2241 break;
2242 default:
2243 return NULL;
2246 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2247 return NULL;
2249 oprnd0 = gimple_assign_rhs1 (last_stmt);
2250 oprnd1 = gimple_assign_rhs2 (last_stmt);
2251 itype = TREE_TYPE (oprnd0);
2252 if (TREE_CODE (oprnd0) != SSA_NAME
2253 || TREE_CODE (oprnd1) != INTEGER_CST
2254 || TREE_CODE (itype) != INTEGER_TYPE
2255 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2256 return NULL;
2258 vectype = get_vectype_for_scalar_type (itype);
2259 if (vectype == NULL_TREE)
2260 return NULL;
2262 /* If the target can handle vectorized division or modulo natively,
2263 don't attempt to optimize this. */
2264 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2265 if (optab != unknown_optab)
2267 machine_mode vec_mode = TYPE_MODE (vectype);
2268 int icode = (int) optab_handler (optab, vec_mode);
2269 if (icode != CODE_FOR_nothing)
2270 return NULL;
2273 prec = TYPE_PRECISION (itype);
2274 if (integer_pow2p (oprnd1))
2276 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2277 return NULL;
2279 /* Pattern detected. */
2280 if (dump_enabled_p ())
2281 dump_printf_loc (MSG_NOTE, vect_location,
2282 "vect_recog_divmod_pattern: detected:\n");
2284 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2285 build_int_cst (itype, 0));
2286 if (rhs_code == TRUNC_DIV_EXPR)
2288 tree var = vect_recog_temp_ssa_var (itype, NULL);
2289 tree shift;
2290 def_stmt
2291 = gimple_build_assign (var, COND_EXPR, cond,
2292 fold_build2 (MINUS_EXPR, itype, oprnd1,
2293 build_int_cst (itype, 1)),
2294 build_int_cst (itype, 0));
2295 new_pattern_def_seq (stmt_vinfo, def_stmt);
2296 var = vect_recog_temp_ssa_var (itype, NULL);
2297 def_stmt
2298 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2299 gimple_assign_lhs (def_stmt));
2300 append_pattern_def_seq (stmt_vinfo, def_stmt);
2302 shift = build_int_cst (itype, tree_log2 (oprnd1));
2303 pattern_stmt
2304 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2305 RSHIFT_EXPR, var, shift);
2307 else
2309 tree signmask;
2310 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2311 if (compare_tree_int (oprnd1, 2) == 0)
2313 signmask = vect_recog_temp_ssa_var (itype, NULL);
2314 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2315 build_int_cst (itype, 1),
2316 build_int_cst (itype, 0));
2317 append_pattern_def_seq (stmt_vinfo, def_stmt);
2319 else
2321 tree utype
2322 = build_nonstandard_integer_type (prec, 1);
2323 tree vecutype = get_vectype_for_scalar_type (utype);
2324 tree shift
2325 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2326 - tree_log2 (oprnd1));
2327 tree var = vect_recog_temp_ssa_var (utype, NULL);
2329 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2330 build_int_cst (utype, -1),
2331 build_int_cst (utype, 0));
2332 def_stmt_vinfo
2333 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2334 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2335 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2336 append_pattern_def_seq (stmt_vinfo, def_stmt);
2337 var = vect_recog_temp_ssa_var (utype, NULL);
2338 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2339 gimple_assign_lhs (def_stmt),
2340 shift);
2341 def_stmt_vinfo
2342 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2343 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2344 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2345 append_pattern_def_seq (stmt_vinfo, def_stmt);
2346 signmask = vect_recog_temp_ssa_var (itype, NULL);
2347 def_stmt
2348 = gimple_build_assign (signmask, NOP_EXPR, var);
2349 append_pattern_def_seq (stmt_vinfo, def_stmt);
2351 def_stmt
2352 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2353 PLUS_EXPR, oprnd0, signmask);
2354 append_pattern_def_seq (stmt_vinfo, def_stmt);
2355 def_stmt
2356 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2357 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2358 fold_build2 (MINUS_EXPR, itype, oprnd1,
2359 build_int_cst (itype, 1)));
2360 append_pattern_def_seq (stmt_vinfo, def_stmt);
2362 pattern_stmt
2363 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2364 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2365 signmask);
2368 if (dump_enabled_p ())
2369 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2372 stmts->safe_push (last_stmt);
2374 *type_in = vectype;
2375 *type_out = vectype;
2376 return pattern_stmt;
2379 if (prec > HOST_BITS_PER_WIDE_INT
2380 || integer_zerop (oprnd1))
2381 return NULL;
2383 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2384 return NULL;
2386 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2388 if (TYPE_UNSIGNED (itype))
2390 unsigned HOST_WIDE_INT mh, ml;
2391 int pre_shift, post_shift;
2392 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2393 & GET_MODE_MASK (TYPE_MODE (itype)));
2394 tree t1, t2, t3, t4;
2396 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2397 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2398 return NULL;
2400 /* Find a suitable multiplier and right shift count
2401 instead of multiplying with D. */
2402 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2404 /* If the suggested multiplier is more than SIZE bits, we can do better
2405 for even divisors, using an initial right shift. */
2406 if (mh != 0 && (d & 1) == 0)
2408 pre_shift = floor_log2 (d & -d);
2409 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2410 &ml, &post_shift, &dummy_int);
2411 gcc_assert (!mh);
2413 else
2414 pre_shift = 0;
2416 if (mh != 0)
2418 if (post_shift - 1 >= prec)
2419 return NULL;
2421 /* t1 = oprnd0 h* ml;
2422 t2 = oprnd0 - t1;
2423 t3 = t2 >> 1;
2424 t4 = t1 + t3;
2425 q = t4 >> (post_shift - 1); */
2426 t1 = vect_recog_temp_ssa_var (itype, NULL);
2427 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2428 build_int_cst (itype, ml));
2429 append_pattern_def_seq (stmt_vinfo, def_stmt);
2431 t2 = vect_recog_temp_ssa_var (itype, NULL);
2432 def_stmt
2433 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2434 append_pattern_def_seq (stmt_vinfo, def_stmt);
2436 t3 = vect_recog_temp_ssa_var (itype, NULL);
2437 def_stmt
2438 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2439 append_pattern_def_seq (stmt_vinfo, def_stmt);
2441 t4 = vect_recog_temp_ssa_var (itype, NULL);
2442 def_stmt
2443 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2445 if (post_shift != 1)
2447 append_pattern_def_seq (stmt_vinfo, def_stmt);
2449 q = vect_recog_temp_ssa_var (itype, NULL);
2450 pattern_stmt
2451 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2452 build_int_cst (itype, post_shift - 1));
2454 else
2456 q = t4;
2457 pattern_stmt = def_stmt;
2460 else
2462 if (pre_shift >= prec || post_shift >= prec)
2463 return NULL;
2465 /* t1 = oprnd0 >> pre_shift;
2466 t2 = t1 h* ml;
2467 q = t2 >> post_shift; */
2468 if (pre_shift)
2470 t1 = vect_recog_temp_ssa_var (itype, NULL);
2471 def_stmt
2472 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2473 build_int_cst (NULL, pre_shift));
2474 append_pattern_def_seq (stmt_vinfo, def_stmt);
2476 else
2477 t1 = oprnd0;
2479 t2 = vect_recog_temp_ssa_var (itype, NULL);
2480 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2481 build_int_cst (itype, ml));
2483 if (post_shift)
2485 append_pattern_def_seq (stmt_vinfo, def_stmt);
2487 q = vect_recog_temp_ssa_var (itype, NULL);
2488 def_stmt
2489 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2490 build_int_cst (itype, post_shift));
2492 else
2493 q = t2;
2495 pattern_stmt = def_stmt;
2498 else
2500 unsigned HOST_WIDE_INT ml;
2501 int post_shift;
2502 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2503 unsigned HOST_WIDE_INT abs_d;
2504 bool add = false;
2505 tree t1, t2, t3, t4;
2507 /* Give up for -1. */
2508 if (d == -1)
2509 return NULL;
2511 /* Since d might be INT_MIN, we have to cast to
2512 unsigned HOST_WIDE_INT before negating to avoid
2513 undefined signed overflow. */
2514 abs_d = (d >= 0
2515 ? (unsigned HOST_WIDE_INT) d
2516 : - (unsigned HOST_WIDE_INT) d);
2518 /* n rem d = n rem -d */
2519 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2521 d = abs_d;
2522 oprnd1 = build_int_cst (itype, abs_d);
2524 else if (HOST_BITS_PER_WIDE_INT >= prec
2525 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2526 /* This case is not handled correctly below. */
2527 return NULL;
2529 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2530 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2532 add = true;
2533 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2535 if (post_shift >= prec)
2536 return NULL;
2538 /* t1 = oprnd0 h* ml; */
2539 t1 = vect_recog_temp_ssa_var (itype, NULL);
2540 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2541 build_int_cst (itype, ml));
2543 if (add)
2545 /* t2 = t1 + oprnd0; */
2546 append_pattern_def_seq (stmt_vinfo, def_stmt);
2547 t2 = vect_recog_temp_ssa_var (itype, NULL);
2548 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2550 else
2551 t2 = t1;
2553 if (post_shift)
2555 /* t3 = t2 >> post_shift; */
2556 append_pattern_def_seq (stmt_vinfo, def_stmt);
2557 t3 = vect_recog_temp_ssa_var (itype, NULL);
2558 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2559 build_int_cst (itype, post_shift));
2561 else
2562 t3 = t2;
2564 wide_int oprnd0_min, oprnd0_max;
2565 int msb = 1;
2566 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2568 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2569 msb = 0;
2570 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2571 msb = -1;
2574 if (msb == 0 && d >= 0)
2576 /* q = t3; */
2577 q = t3;
2578 pattern_stmt = def_stmt;
2580 else
2582 /* t4 = oprnd0 >> (prec - 1);
2583 or if we know from VRP that oprnd0 >= 0
2584 t4 = 0;
2585 or if we know from VRP that oprnd0 < 0
2586 t4 = -1; */
2587 append_pattern_def_seq (stmt_vinfo, def_stmt);
2588 t4 = vect_recog_temp_ssa_var (itype, NULL);
2589 if (msb != 1)
2590 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2591 build_int_cst (itype, msb));
2592 else
2593 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2594 build_int_cst (itype, prec - 1));
2595 append_pattern_def_seq (stmt_vinfo, def_stmt);
2597 /* q = t3 - t4; or q = t4 - t3; */
2598 q = vect_recog_temp_ssa_var (itype, NULL);
2599 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2600 d < 0 ? t3 : t4);
2604 if (rhs_code == TRUNC_MOD_EXPR)
2606 tree r, t1;
2608 /* We divided. Now finish by:
2609 t1 = q * oprnd1;
2610 r = oprnd0 - t1; */
2611 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2613 t1 = vect_recog_temp_ssa_var (itype, NULL);
2614 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2615 append_pattern_def_seq (stmt_vinfo, def_stmt);
2617 r = vect_recog_temp_ssa_var (itype, NULL);
2618 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2621 /* Pattern detected. */
2622 if (dump_enabled_p ())
2624 dump_printf_loc (MSG_NOTE, vect_location,
2625 "vect_recog_divmod_pattern: detected: ");
2626 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2627 dump_printf (MSG_NOTE, "\n");
2630 stmts->safe_push (last_stmt);
2632 *type_in = vectype;
2633 *type_out = vectype;
2634 return pattern_stmt;
2637 /* Function vect_recog_mixed_size_cond_pattern
2639 Try to find the following pattern:
2641 type x_t, y_t;
2642 TYPE a_T, b_T, c_T;
2643 loop:
2644 S1 a_T = x_t CMP y_t ? b_T : c_T;
2646 where type 'TYPE' is an integral type which has different size
2647 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2648 than 'type', the constants need to fit into an integer type
2649 with the same width as 'type') or results of conversion from 'type'.
2651 Input:
2653 * LAST_STMT: A stmt from which the pattern search begins.
2655 Output:
2657 * TYPE_IN: The type of the input arguments to the pattern.
2659 * TYPE_OUT: The type of the output of this pattern.
2661 * Return value: A new stmt that will be used to replace the pattern.
2662 Additionally a def_stmt is added.
2664 a_it = x_t CMP y_t ? b_it : c_it;
2665 a_T = (TYPE) a_it; */
2667 static gimple
2668 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2669 tree *type_out)
2671 gimple last_stmt = (*stmts)[0];
2672 tree cond_expr, then_clause, else_clause;
2673 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2674 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2675 machine_mode cmpmode;
2676 gimple pattern_stmt, def_stmt;
2677 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2678 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2679 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2680 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2681 bool promotion;
2682 tree comp_scalar_type;
2684 if (!is_gimple_assign (last_stmt)
2685 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2686 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2687 return NULL;
2689 cond_expr = gimple_assign_rhs1 (last_stmt);
2690 then_clause = gimple_assign_rhs2 (last_stmt);
2691 else_clause = gimple_assign_rhs3 (last_stmt);
2693 if (!COMPARISON_CLASS_P (cond_expr))
2694 return NULL;
2696 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2697 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2698 if (comp_vectype == NULL_TREE)
2699 return NULL;
2701 type = gimple_expr_type (last_stmt);
2702 if (types_compatible_p (type, comp_scalar_type)
2703 || ((TREE_CODE (then_clause) != INTEGER_CST
2704 || TREE_CODE (else_clause) != INTEGER_CST)
2705 && !INTEGRAL_TYPE_P (comp_scalar_type))
2706 || !INTEGRAL_TYPE_P (type))
2707 return NULL;
2709 if ((TREE_CODE (then_clause) != INTEGER_CST
2710 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2711 &def_stmt0, &promotion))
2712 || (TREE_CODE (else_clause) != INTEGER_CST
2713 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2714 &def_stmt1, &promotion)))
2715 return NULL;
2717 if (orig_type0 && orig_type1
2718 && !types_compatible_p (orig_type0, orig_type1))
2719 return NULL;
2721 if (orig_type0)
2723 if (!types_compatible_p (orig_type0, comp_scalar_type))
2724 return NULL;
2725 then_clause = gimple_assign_rhs1 (def_stmt0);
2726 itype = orig_type0;
2729 if (orig_type1)
2731 if (!types_compatible_p (orig_type1, comp_scalar_type))
2732 return NULL;
2733 else_clause = gimple_assign_rhs1 (def_stmt1);
2734 itype = orig_type1;
2737 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2739 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2740 return NULL;
2742 vectype = get_vectype_for_scalar_type (type);
2743 if (vectype == NULL_TREE)
2744 return NULL;
2746 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2747 return NULL;
2749 if (itype == NULL_TREE)
2750 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2751 TYPE_UNSIGNED (type));
2753 if (itype == NULL_TREE
2754 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2755 return NULL;
2757 vecitype = get_vectype_for_scalar_type (itype);
2758 if (vecitype == NULL_TREE)
2759 return NULL;
2761 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2762 return NULL;
2764 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2766 if ((TREE_CODE (then_clause) == INTEGER_CST
2767 && !int_fits_type_p (then_clause, itype))
2768 || (TREE_CODE (else_clause) == INTEGER_CST
2769 && !int_fits_type_p (else_clause, itype)))
2770 return NULL;
2773 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2774 COND_EXPR, unshare_expr (cond_expr),
2775 fold_convert (itype, then_clause),
2776 fold_convert (itype, else_clause));
2777 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2778 NOP_EXPR, gimple_assign_lhs (def_stmt));
2780 new_pattern_def_seq (stmt_vinfo, def_stmt);
2781 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2782 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2783 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2784 *type_in = vecitype;
2785 *type_out = vectype;
2787 if (dump_enabled_p ())
2788 dump_printf_loc (MSG_NOTE, vect_location,
2789 "vect_recog_mixed_size_cond_pattern: detected:\n");
2791 return pattern_stmt;
2795 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2796 true if bool VAR can be optimized that way. */
2798 static bool
2799 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2801 gimple def_stmt;
2802 enum vect_def_type dt;
2803 tree def, rhs1;
2804 enum tree_code rhs_code;
2806 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2807 &dt))
2808 return false;
2810 if (dt != vect_internal_def)
2811 return false;
2813 if (!is_gimple_assign (def_stmt))
2814 return false;
2816 if (!has_single_use (def))
2817 return false;
2819 rhs1 = gimple_assign_rhs1 (def_stmt);
2820 rhs_code = gimple_assign_rhs_code (def_stmt);
2821 switch (rhs_code)
2823 case SSA_NAME:
2824 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2826 CASE_CONVERT:
2827 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2828 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2829 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2830 return false;
2831 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2833 case BIT_NOT_EXPR:
2834 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2836 case BIT_AND_EXPR:
2837 case BIT_IOR_EXPR:
2838 case BIT_XOR_EXPR:
2839 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2840 return false;
2841 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2842 bb_vinfo);
2844 default:
2845 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2847 tree vecitype, comp_vectype;
2849 /* If the comparison can throw, then is_gimple_condexpr will be
2850 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2851 if (stmt_could_throw_p (def_stmt))
2852 return false;
2854 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2855 if (comp_vectype == NULL_TREE)
2856 return false;
2858 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2860 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2861 tree itype
2862 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2863 vecitype = get_vectype_for_scalar_type (itype);
2864 if (vecitype == NULL_TREE)
2865 return false;
2867 else
2868 vecitype = comp_vectype;
2869 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2871 return false;
2876 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2877 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2878 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2880 static tree
2881 adjust_bool_pattern_cast (tree type, tree var)
2883 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2884 gimple cast_stmt, pattern_stmt;
2886 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2887 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2888 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2889 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2890 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2891 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2892 return gimple_assign_lhs (cast_stmt);
2896 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2897 recursively. VAR is an SSA_NAME that should be transformed from bool
2898 to a wider integer type, OUT_TYPE is the desired final integer type of
2899 the whole pattern, TRUEVAL should be NULL unless optimizing
2900 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2901 in the then_clause, STMTS is where statements with added pattern stmts
2902 should be pushed to. */
2904 static tree
2905 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2906 vec<gimple> *stmts)
2908 gimple stmt = SSA_NAME_DEF_STMT (var);
2909 enum tree_code rhs_code, def_rhs_code;
2910 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2911 location_t loc;
2912 gimple pattern_stmt, def_stmt;
2914 rhs1 = gimple_assign_rhs1 (stmt);
2915 rhs2 = gimple_assign_rhs2 (stmt);
2916 rhs_code = gimple_assign_rhs_code (stmt);
2917 loc = gimple_location (stmt);
2918 switch (rhs_code)
2920 case SSA_NAME:
2921 CASE_CONVERT:
2922 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2923 itype = TREE_TYPE (irhs1);
2924 pattern_stmt
2925 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2926 SSA_NAME, irhs1);
2927 break;
2929 case BIT_NOT_EXPR:
2930 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2931 itype = TREE_TYPE (irhs1);
2932 pattern_stmt
2933 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2934 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
2935 break;
2937 case BIT_AND_EXPR:
2938 /* Try to optimize x = y & (a < b ? 1 : 0); into
2939 x = (a < b ? y : 0);
2941 E.g. for:
2942 bool a_b, b_b, c_b;
2943 TYPE d_T;
2945 S1 a_b = x1 CMP1 y1;
2946 S2 b_b = x2 CMP2 y2;
2947 S3 c_b = a_b & b_b;
2948 S4 d_T = (TYPE) c_b;
2950 we would normally emit:
2952 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2953 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2954 S3' c_T = a_T & b_T;
2955 S4' d_T = c_T;
2957 but we can save one stmt by using the
2958 result of one of the COND_EXPRs in the other COND_EXPR and leave
2959 BIT_AND_EXPR stmt out:
2961 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2962 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2963 S4' f_T = c_T;
2965 At least when VEC_COND_EXPR is implemented using masks
2966 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2967 computes the comparison masks and ands it, in one case with
2968 all ones vector, in the other case with a vector register.
2969 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2970 often more expensive. */
2971 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2972 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2973 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2975 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2976 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2977 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2978 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2980 gimple tstmt;
2981 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2982 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2983 tstmt = stmts->pop ();
2984 gcc_assert (tstmt == def_stmt);
2985 stmts->quick_push (stmt);
2986 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2987 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2988 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2989 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2990 return irhs2;
2992 else
2993 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2994 goto and_ior_xor;
2996 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2997 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2998 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3000 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3001 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3002 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3003 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3005 gimple tstmt;
3006 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3007 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3008 tstmt = stmts->pop ();
3009 gcc_assert (tstmt == def_stmt);
3010 stmts->quick_push (stmt);
3011 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3012 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3013 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3014 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3015 return irhs1;
3017 else
3018 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3019 goto and_ior_xor;
3021 /* FALLTHRU */
3022 case BIT_IOR_EXPR:
3023 case BIT_XOR_EXPR:
3024 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3025 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3026 and_ior_xor:
3027 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3028 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3030 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3031 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3032 int out_prec = TYPE_PRECISION (out_type);
3033 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3034 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3035 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3036 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3037 else
3039 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3040 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3043 itype = TREE_TYPE (irhs1);
3044 pattern_stmt
3045 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3046 rhs_code, irhs1, irhs2);
3047 break;
3049 default:
3050 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3051 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3052 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3053 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3054 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3056 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3057 itype
3058 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3060 else
3061 itype = TREE_TYPE (rhs1);
3062 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3063 if (trueval == NULL_TREE)
3064 trueval = build_int_cst (itype, 1);
3065 else
3066 gcc_checking_assert (useless_type_conversion_p (itype,
3067 TREE_TYPE (trueval)));
3068 pattern_stmt
3069 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3070 COND_EXPR, cond_expr, trueval,
3071 build_int_cst (itype, 0));
3072 break;
3075 stmts->safe_push (stmt);
3076 gimple_set_location (pattern_stmt, loc);
3077 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3078 return gimple_assign_lhs (pattern_stmt);
3082 /* Function vect_recog_bool_pattern
3084 Try to find pattern like following:
3086 bool a_b, b_b, c_b, d_b, e_b;
3087 TYPE f_T;
3088 loop:
3089 S1 a_b = x1 CMP1 y1;
3090 S2 b_b = x2 CMP2 y2;
3091 S3 c_b = a_b & b_b;
3092 S4 d_b = x3 CMP3 y3;
3093 S5 e_b = c_b | d_b;
3094 S6 f_T = (TYPE) e_b;
3096 where type 'TYPE' is an integral type. Or a similar pattern
3097 ending in
3099 S6 f_Y = e_b ? r_Y : s_Y;
3101 as results from if-conversion of a complex condition.
3103 Input:
3105 * LAST_STMT: A stmt at the end from which the pattern
3106 search begins, i.e. cast of a bool to
3107 an integer type.
3109 Output:
3111 * TYPE_IN: The type of the input arguments to the pattern.
3113 * TYPE_OUT: The type of the output of this pattern.
3115 * Return value: A new stmt that will be used to replace the pattern.
3117 Assuming size of TYPE is the same as size of all comparisons
3118 (otherwise some casts would be added where needed), the above
3119 sequence we create related pattern stmts:
3120 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3121 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3122 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3123 S5' e_T = c_T | d_T;
3124 S6' f_T = e_T;
3126 Instead of the above S3' we could emit:
3127 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3128 S3' c_T = a_T | b_T;
3129 but the above is more efficient. */
3131 static gimple
3132 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
3133 tree *type_out)
3135 gimple last_stmt = stmts->pop ();
3136 enum tree_code rhs_code;
3137 tree var, lhs, rhs, vectype;
3138 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3139 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3140 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
3141 gimple pattern_stmt;
3143 if (!is_gimple_assign (last_stmt))
3144 return NULL;
3146 var = gimple_assign_rhs1 (last_stmt);
3147 lhs = gimple_assign_lhs (last_stmt);
3149 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3150 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3151 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3152 return NULL;
3154 rhs_code = gimple_assign_rhs_code (last_stmt);
3155 if (CONVERT_EXPR_CODE_P (rhs_code))
3157 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3158 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3159 return NULL;
3160 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3161 if (vectype == NULL_TREE)
3162 return NULL;
3164 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3165 return NULL;
3167 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3168 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3169 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3170 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3171 else
3172 pattern_stmt
3173 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3174 *type_out = vectype;
3175 *type_in = vectype;
3176 stmts->safe_push (last_stmt);
3177 if (dump_enabled_p ())
3178 dump_printf_loc (MSG_NOTE, vect_location,
3179 "vect_recog_bool_pattern: detected:\n");
3181 return pattern_stmt;
3183 else if (rhs_code == COND_EXPR
3184 && TREE_CODE (var) == SSA_NAME)
3186 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3187 if (vectype == NULL_TREE)
3188 return NULL;
3190 /* Build a scalar type for the boolean result that when
3191 vectorized matches the vector type of the result in
3192 size and number of elements. */
3193 unsigned prec
3194 = wi::udiv_trunc (TYPE_SIZE (vectype),
3195 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3196 tree type
3197 = build_nonstandard_integer_type (prec,
3198 TYPE_UNSIGNED (TREE_TYPE (var)));
3199 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3200 return NULL;
3202 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3203 return NULL;
3205 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3206 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3207 pattern_stmt
3208 = gimple_build_assign (lhs, COND_EXPR,
3209 build2 (NE_EXPR, boolean_type_node,
3210 rhs, build_int_cst (type, 0)),
3211 gimple_assign_rhs2 (last_stmt),
3212 gimple_assign_rhs3 (last_stmt));
3213 *type_out = vectype;
3214 *type_in = vectype;
3215 stmts->safe_push (last_stmt);
3216 if (dump_enabled_p ())
3217 dump_printf_loc (MSG_NOTE, vect_location,
3218 "vect_recog_bool_pattern: detected:\n");
3220 return pattern_stmt;
3222 else if (rhs_code == SSA_NAME
3223 && STMT_VINFO_DATA_REF (stmt_vinfo))
3225 stmt_vec_info pattern_stmt_info;
3226 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3227 gcc_assert (vectype != NULL_TREE);
3228 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3229 return NULL;
3230 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3231 return NULL;
3233 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3234 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3235 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3237 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3238 gimple cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3239 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3240 rhs = rhs2;
3242 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3243 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3244 bb_vinfo);
3245 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3246 STMT_VINFO_DATA_REF (pattern_stmt_info)
3247 = STMT_VINFO_DATA_REF (stmt_vinfo);
3248 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3249 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3250 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3251 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3252 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3253 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3254 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3255 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3256 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3257 *type_out = vectype;
3258 *type_in = vectype;
3259 stmts->safe_push (last_stmt);
3260 if (dump_enabled_p ())
3261 dump_printf_loc (MSG_NOTE, vect_location,
3262 "vect_recog_bool_pattern: detected:\n");
3263 return pattern_stmt;
3265 else
3266 return NULL;
3270 /* Mark statements that are involved in a pattern. */
3272 static inline void
3273 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3274 tree pattern_vectype)
3276 stmt_vec_info pattern_stmt_info, def_stmt_info;
3277 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3278 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3279 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3280 gimple def_stmt;
3282 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3283 if (pattern_stmt_info == NULL)
3285 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3286 bb_vinfo);
3287 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3289 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3291 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3292 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3293 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3294 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3295 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3296 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3297 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3298 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3299 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3301 gimple_stmt_iterator si;
3302 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3303 !gsi_end_p (si); gsi_next (&si))
3305 def_stmt = gsi_stmt (si);
3306 def_stmt_info = vinfo_for_stmt (def_stmt);
3307 if (def_stmt_info == NULL)
3309 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3310 bb_vinfo);
3311 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3313 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3314 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3315 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3316 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3317 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3322 /* Function vect_pattern_recog_1
3324 Input:
3325 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3326 computation pattern.
3327 STMT: A stmt from which the pattern search should start.
3329 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3330 expression that computes the same functionality and can be used to
3331 replace the sequence of stmts that are involved in the pattern.
3333 Output:
3334 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3335 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3336 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3337 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3338 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3339 to the available target pattern.
3341 This function also does some bookkeeping, as explained in the documentation
3342 for vect_recog_pattern. */
3344 static void
3345 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3346 gimple_stmt_iterator si,
3347 vec<gimple> *stmts_to_replace)
3349 gimple stmt = gsi_stmt (si), pattern_stmt;
3350 stmt_vec_info stmt_info;
3351 loop_vec_info loop_vinfo;
3352 tree pattern_vectype;
3353 tree type_in, type_out;
3354 enum tree_code code;
3355 int i;
3356 gimple next;
3358 stmts_to_replace->truncate (0);
3359 stmts_to_replace->quick_push (stmt);
3360 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3361 if (!pattern_stmt)
3362 return;
3364 stmt = stmts_to_replace->last ();
3365 stmt_info = vinfo_for_stmt (stmt);
3366 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3368 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3370 /* No need to check target support (already checked by the pattern
3371 recognition function). */
3372 pattern_vectype = type_out ? type_out : type_in;
3374 else
3376 machine_mode vec_mode;
3377 enum insn_code icode;
3378 optab optab;
3380 /* Check target support */
3381 type_in = get_vectype_for_scalar_type (type_in);
3382 if (!type_in)
3383 return;
3384 if (type_out)
3385 type_out = get_vectype_for_scalar_type (type_out);
3386 else
3387 type_out = type_in;
3388 if (!type_out)
3389 return;
3390 pattern_vectype = type_out;
3392 if (is_gimple_assign (pattern_stmt))
3393 code = gimple_assign_rhs_code (pattern_stmt);
3394 else
3396 gcc_assert (is_gimple_call (pattern_stmt));
3397 code = CALL_EXPR;
3400 optab = optab_for_tree_code (code, type_in, optab_default);
3401 vec_mode = TYPE_MODE (type_in);
3402 if (!optab
3403 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3404 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3405 return;
3408 /* Found a vectorizable pattern. */
3409 if (dump_enabled_p ())
3411 dump_printf_loc (MSG_NOTE, vect_location,
3412 "pattern recognized: ");
3413 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3416 /* Mark the stmts that are involved in the pattern. */
3417 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3419 /* Patterns cannot be vectorized using SLP, because they change the order of
3420 computation. */
3421 if (loop_vinfo)
3422 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3423 if (next == stmt)
3424 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3426 /* It is possible that additional pattern stmts are created and inserted in
3427 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3428 relevant statements. */
3429 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3430 && (unsigned) i < (stmts_to_replace->length () - 1);
3431 i++)
3433 stmt_info = vinfo_for_stmt (stmt);
3434 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3435 if (dump_enabled_p ())
3437 dump_printf_loc (MSG_NOTE, vect_location,
3438 "additional pattern stmt: ");
3439 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3442 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3447 /* Function vect_pattern_recog
3449 Input:
3450 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3451 computation idioms.
3453 Output - for each computation idiom that is detected we create a new stmt
3454 that provides the same functionality and that can be vectorized. We
3455 also record some information in the struct_stmt_info of the relevant
3456 stmts, as explained below:
3458 At the entry to this function we have the following stmts, with the
3459 following initial value in the STMT_VINFO fields:
3461 stmt in_pattern_p related_stmt vec_stmt
3462 S1: a_i = .... - - -
3463 S2: a_2 = ..use(a_i).. - - -
3464 S3: a_1 = ..use(a_2).. - - -
3465 S4: a_0 = ..use(a_1).. - - -
3466 S5: ... = ..use(a_0).. - - -
3468 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3469 represented by a single stmt. We then:
3470 - create a new stmt S6 equivalent to the pattern (the stmt is not
3471 inserted into the code)
3472 - fill in the STMT_VINFO fields as follows:
3474 in_pattern_p related_stmt vec_stmt
3475 S1: a_i = .... - - -
3476 S2: a_2 = ..use(a_i).. - - -
3477 S3: a_1 = ..use(a_2).. - - -
3478 S4: a_0 = ..use(a_1).. true S6 -
3479 '---> S6: a_new = .... - S4 -
3480 S5: ... = ..use(a_0).. - - -
3482 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3483 to each other through the RELATED_STMT field).
3485 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3486 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3487 remain irrelevant unless used by stmts other than S4.
3489 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3490 (because they are marked as irrelevant). It will vectorize S6, and record
3491 a pointer to the new vector stmt VS6 from S6 (as usual).
3492 S4 will be skipped, and S5 will be vectorized as usual:
3494 in_pattern_p related_stmt vec_stmt
3495 S1: a_i = .... - - -
3496 S2: a_2 = ..use(a_i).. - - -
3497 S3: a_1 = ..use(a_2).. - - -
3498 > VS6: va_new = .... - - -
3499 S4: a_0 = ..use(a_1).. true S6 VS6
3500 '---> S6: a_new = .... - S4 VS6
3501 > VS5: ... = ..vuse(va_new).. - - -
3502 S5: ... = ..use(a_0).. - - -
3504 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3505 elsewhere), and we'll end up with:
3507 VS6: va_new = ....
3508 VS5: ... = ..vuse(va_new)..
3510 In case of more than one pattern statements, e.g., widen-mult with
3511 intermediate type:
3513 S1 a_t = ;
3514 S2 a_T = (TYPE) a_t;
3515 '--> S3: a_it = (interm_type) a_t;
3516 S4 prod_T = a_T * CONST;
3517 '--> S5: prod_T' = a_it w* CONST;
3519 there may be other users of a_T outside the pattern. In that case S2 will
3520 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3521 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3522 be recorded in S3. */
3524 void
3525 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3527 struct loop *loop;
3528 basic_block *bbs;
3529 unsigned int nbbs;
3530 gimple_stmt_iterator si;
3531 unsigned int i, j;
3532 vect_recog_func_ptr vect_recog_func;
3533 auto_vec<gimple, 1> stmts_to_replace;
3534 gimple stmt;
3536 if (dump_enabled_p ())
3537 dump_printf_loc (MSG_NOTE, vect_location,
3538 "=== vect_pattern_recog ===\n");
3540 if (loop_vinfo)
3542 loop = LOOP_VINFO_LOOP (loop_vinfo);
3543 bbs = LOOP_VINFO_BBS (loop_vinfo);
3544 nbbs = loop->num_nodes;
3546 else
3548 bbs = &BB_VINFO_BB (bb_vinfo);
3549 nbbs = 1;
3552 /* Scan through the loop stmts, applying the pattern recognition
3553 functions starting at each stmt visited: */
3554 for (i = 0; i < nbbs; i++)
3556 basic_block bb = bbs[i];
3557 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3559 if (bb_vinfo && (stmt = gsi_stmt (si))
3560 && vinfo_for_stmt (stmt)
3561 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3562 continue;
3564 /* Scan over all generic vect_recog_xxx_pattern functions. */
3565 for (j = 0; j < NUM_PATTERNS; j++)
3567 vect_recog_func = vect_vect_recog_func_ptrs[j];
3568 vect_pattern_recog_1 (vect_recog_func, si,
3569 &stmts_to_replace);