* gcc.dg/store-motion-fgcse-sm.c (dg-final): Cleanup
[official-gcc.git] / gcc / tree-vect-patterns.c
blobf2bce3cbaa5abb6b00a827be0da3778af39c150c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "stor-layout.h"
27 #include "target.h"
28 #include "predict.h"
29 #include "vec.h"
30 #include "hashtab.h"
31 #include "hash-set.h"
32 #include "machmode.h"
33 #include "hard-reg-set.h"
34 #include "input.h"
35 #include "function.h"
36 #include "dominance.h"
37 #include "basic-block.h"
38 #include "gimple-pretty-print.h"
39 #include "tree-ssa-alias.h"
40 #include "internal-fn.h"
41 #include "tree-eh.h"
42 #include "gimple-expr.h"
43 #include "is-a.h"
44 #include "gimple.h"
45 #include "gimplify.h"
46 #include "gimple-iterator.h"
47 #include "gimple-ssa.h"
48 #include "tree-phinodes.h"
49 #include "ssa-iterators.h"
50 #include "stringpool.h"
51 #include "tree-ssanames.h"
52 #include "cfgloop.h"
53 #include "expr.h"
54 #include "insn-codes.h"
55 #include "optabs.h"
56 #include "params.h"
57 #include "tree-data-ref.h"
58 #include "tree-vectorizer.h"
59 #include "recog.h" /* FIXME: for insn_data */
60 #include "diagnostic-core.h"
61 #include "dumpfile.h"
62 #include "builtins.h"
64 /* Pattern recognition functions */
65 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
66 tree *);
67 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
68 tree *);
69 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
70 tree *);
71 static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
72 tree *);
73 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
74 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
75 tree *);
76 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
77 tree *, tree *);
78 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
79 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
80 tree *, tree *);
81 static gimple vect_recog_divmod_pattern (vec<gimple> *,
82 tree *, tree *);
83 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
84 tree *, tree *);
85 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
86 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
87 vect_recog_widen_mult_pattern,
88 vect_recog_widen_sum_pattern,
89 vect_recog_dot_prod_pattern,
90 vect_recog_sad_pattern,
91 vect_recog_pow_pattern,
92 vect_recog_widen_shift_pattern,
93 vect_recog_over_widening_pattern,
94 vect_recog_rotate_pattern,
95 vect_recog_vector_vector_shift_pattern,
96 vect_recog_divmod_pattern,
97 vect_recog_mixed_size_cond_pattern,
98 vect_recog_bool_pattern};
100 static inline void
101 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
103 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
104 stmt);
107 static inline void
108 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
110 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
111 append_pattern_def_seq (stmt_info, stmt);
114 /* Check whether STMT2 is in the same loop or basic block as STMT1.
115 Which of the two applies depends on whether we're currently doing
116 loop-based or basic-block-based vectorization, as determined by
117 the vinfo_for_stmt for STMT1 (which must be defined).
119 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
120 to be defined as well. */
122 static bool
123 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
125 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
126 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
127 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
129 if (!gimple_bb (stmt2))
130 return false;
132 if (loop_vinfo)
134 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
135 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
136 return false;
138 else
140 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
141 || gimple_code (stmt2) == GIMPLE_PHI)
142 return false;
145 gcc_assert (vinfo_for_stmt (stmt2));
146 return true;
149 /* If the LHS of DEF_STMT has a single use, and that statement is
150 in the same loop or basic block, return it. */
152 static gimple
153 vect_single_imm_use (gimple def_stmt)
155 tree lhs = gimple_assign_lhs (def_stmt);
156 use_operand_p use_p;
157 gimple use_stmt;
159 if (!single_imm_use (lhs, &use_p, &use_stmt))
160 return NULL;
162 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
163 return NULL;
165 return use_stmt;
168 /* Check whether NAME, an ssa-name used in USE_STMT,
169 is a result of a type promotion, such that:
170 DEF_STMT: NAME = NOP (name0)
171 If CHECK_SIGN is TRUE, check that either both types are signed or both are
172 unsigned. */
174 static bool
175 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
176 tree *orig_type, gimple *def_stmt, bool *promotion)
178 tree dummy;
179 gimple dummy_gimple;
180 loop_vec_info loop_vinfo;
181 stmt_vec_info stmt_vinfo;
182 tree type = TREE_TYPE (name);
183 tree oprnd0;
184 enum vect_def_type dt;
185 tree def;
186 bb_vec_info bb_vinfo;
188 stmt_vinfo = vinfo_for_stmt (use_stmt);
189 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
190 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
191 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
192 &def, &dt))
193 return false;
195 if (dt != vect_internal_def
196 && dt != vect_external_def && dt != vect_constant_def)
197 return false;
199 if (!*def_stmt)
200 return false;
202 if (!is_gimple_assign (*def_stmt))
203 return false;
205 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
206 return false;
208 oprnd0 = gimple_assign_rhs1 (*def_stmt);
210 *orig_type = TREE_TYPE (oprnd0);
211 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
212 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
213 return false;
215 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
216 *promotion = true;
217 else
218 *promotion = false;
220 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
221 bb_vinfo, &dummy_gimple, &dummy, &dt))
222 return false;
224 return true;
227 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
228 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
230 static tree
231 vect_recog_temp_ssa_var (tree type, gimple stmt)
233 return make_temp_ssa_name (type, stmt, "patt");
236 /* Function vect_recog_dot_prod_pattern
238 Try to find the following pattern:
240 type x_t, y_t;
241 TYPE1 prod;
242 TYPE2 sum = init;
243 loop:
244 sum_0 = phi <init, sum_1>
245 S1 x_t = ...
246 S2 y_t = ...
247 S3 x_T = (TYPE1) x_t;
248 S4 y_T = (TYPE1) y_t;
249 S5 prod = x_T * y_T;
250 [S6 prod = (TYPE2) prod; #optional]
251 S7 sum_1 = prod + sum_0;
253 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
254 same size of 'TYPE1' or bigger. This is a special case of a reduction
255 computation.
257 Input:
259 * STMTS: Contains a stmt from which the pattern search begins. In the
260 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
261 will be detected.
263 Output:
265 * TYPE_IN: The type of the input arguments to the pattern.
267 * TYPE_OUT: The type of the output of this pattern.
269 * Return value: A new stmt that will be used to replace the sequence of
270 stmts that constitute the pattern. In this case it will be:
271 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
273 Note: The dot-prod idiom is a widening reduction pattern that is
274 vectorized without preserving all the intermediate results. It
275 produces only N/2 (widened) results (by summing up pairs of
276 intermediate results) rather than all N results. Therefore, we
277 cannot allow this pattern when we want to get all the results and in
278 the correct order (as is the case when this computation is in an
279 inner-loop nested in an outer-loop that us being vectorized). */
281 static gimple
282 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
283 tree *type_out)
285 gimple stmt, last_stmt = (*stmts)[0];
286 tree oprnd0, oprnd1;
287 tree oprnd00, oprnd01;
288 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
289 tree type, half_type;
290 gimple pattern_stmt;
291 tree prod_type;
292 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
293 struct loop *loop;
294 tree var;
295 bool promotion;
297 if (!loop_info)
298 return NULL;
300 loop = LOOP_VINFO_LOOP (loop_info);
302 if (!is_gimple_assign (last_stmt))
303 return NULL;
305 type = gimple_expr_type (last_stmt);
307 /* Look for the following pattern
308 DX = (TYPE1) X;
309 DY = (TYPE1) Y;
310 DPROD = DX * DY;
311 DDPROD = (TYPE2) DPROD;
312 sum_1 = DDPROD + sum_0;
313 In which
314 - DX is double the size of X
315 - DY is double the size of Y
316 - DX, DY, DPROD all have the same type
317 - sum is the same size of DPROD or bigger
318 - sum has been recognized as a reduction variable.
320 This is equivalent to:
321 DPROD = X w* Y; #widen mult
322 sum_1 = DPROD w+ sum_0; #widen summation
324 DPROD = X w* Y; #widen mult
325 sum_1 = DPROD + sum_0; #summation
328 /* Starting from LAST_STMT, follow the defs of its uses in search
329 of the above pattern. */
331 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
332 return NULL;
334 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
336 /* Has been detected as widening-summation? */
338 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
339 type = gimple_expr_type (stmt);
340 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
341 return NULL;
342 oprnd0 = gimple_assign_rhs1 (stmt);
343 oprnd1 = gimple_assign_rhs2 (stmt);
344 half_type = TREE_TYPE (oprnd0);
346 else
348 gimple def_stmt;
350 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
351 return NULL;
352 oprnd0 = gimple_assign_rhs1 (last_stmt);
353 oprnd1 = gimple_assign_rhs2 (last_stmt);
354 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
355 || !types_compatible_p (TREE_TYPE (oprnd1), type))
356 return NULL;
357 stmt = last_stmt;
359 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
360 &promotion)
361 && promotion)
363 stmt = def_stmt;
364 oprnd0 = gimple_assign_rhs1 (stmt);
366 else
367 half_type = type;
370 /* So far so good. Since last_stmt was detected as a (summation) reduction,
371 we know that oprnd1 is the reduction variable (defined by a loop-header
372 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
373 Left to check that oprnd0 is defined by a (widen_)mult_expr */
374 if (TREE_CODE (oprnd0) != SSA_NAME)
375 return NULL;
377 prod_type = half_type;
378 stmt = SSA_NAME_DEF_STMT (oprnd0);
380 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
381 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
382 return NULL;
384 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
385 inside the loop (in case we are analyzing an outer-loop). */
386 if (!is_gimple_assign (stmt))
387 return NULL;
388 stmt_vinfo = vinfo_for_stmt (stmt);
389 gcc_assert (stmt_vinfo);
390 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
391 return NULL;
392 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
393 return NULL;
394 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
396 /* Has been detected as a widening multiplication? */
398 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
399 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
400 return NULL;
401 stmt_vinfo = vinfo_for_stmt (stmt);
402 gcc_assert (stmt_vinfo);
403 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
404 oprnd00 = gimple_assign_rhs1 (stmt);
405 oprnd01 = gimple_assign_rhs2 (stmt);
406 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
407 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
409 else
411 tree half_type0, half_type1;
412 gimple def_stmt;
413 tree oprnd0, oprnd1;
415 oprnd0 = gimple_assign_rhs1 (stmt);
416 oprnd1 = gimple_assign_rhs2 (stmt);
417 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
418 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
419 return NULL;
420 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
421 &promotion)
422 || !promotion)
423 return NULL;
424 oprnd00 = gimple_assign_rhs1 (def_stmt);
425 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
426 &promotion)
427 || !promotion)
428 return NULL;
429 oprnd01 = gimple_assign_rhs1 (def_stmt);
430 if (!types_compatible_p (half_type0, half_type1))
431 return NULL;
432 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
433 return NULL;
436 half_type = TREE_TYPE (oprnd00);
437 *type_in = half_type;
438 *type_out = type;
440 /* Pattern detected. Create a stmt to be used to replace the pattern: */
441 var = vect_recog_temp_ssa_var (type, NULL);
442 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
443 oprnd00, oprnd01, oprnd1);
445 if (dump_enabled_p ())
447 dump_printf_loc (MSG_NOTE, vect_location,
448 "vect_recog_dot_prod_pattern: detected: ");
449 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
450 dump_printf (MSG_NOTE, "\n");
453 /* We don't allow changing the order of the computation in the inner-loop
454 when doing outer-loop vectorization. */
455 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
457 return pattern_stmt;
461 /* Function vect_recog_sad_pattern
463 Try to find the following Sum of Absolute Difference (SAD) pattern:
465 type x_t, y_t;
466 signed TYPE1 diff, abs_diff;
467 TYPE2 sum = init;
468 loop:
469 sum_0 = phi <init, sum_1>
470 S1 x_t = ...
471 S2 y_t = ...
472 S3 x_T = (TYPE1) x_t;
473 S4 y_T = (TYPE1) y_t;
474 S5 diff = x_T - y_T;
475 S6 abs_diff = ABS_EXPR <diff>;
476 [S7 abs_diff = (TYPE2) abs_diff; #optional]
477 S8 sum_1 = abs_diff + sum_0;
479 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
480 same size of 'TYPE1' or bigger. This is a special case of a reduction
481 computation.
483 Input:
485 * STMTS: Contains a stmt from which the pattern search begins. In the
486 example, when this function is called with S8, the pattern
487 {S3,S4,S5,S6,S7,S8} will be detected.
489 Output:
491 * TYPE_IN: The type of the input arguments to the pattern.
493 * TYPE_OUT: The type of the output of this pattern.
495 * Return value: A new stmt that will be used to replace the sequence of
496 stmts that constitute the pattern. In this case it will be:
497 SAD_EXPR <x_t, y_t, sum_0>
500 static gimple
501 vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
502 tree *type_out)
504 gimple last_stmt = (*stmts)[0];
505 tree sad_oprnd0, sad_oprnd1;
506 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
507 tree half_type;
508 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
509 struct loop *loop;
510 bool promotion;
512 if (!loop_info)
513 return NULL;
515 loop = LOOP_VINFO_LOOP (loop_info);
517 if (!is_gimple_assign (last_stmt))
518 return NULL;
520 tree sum_type = gimple_expr_type (last_stmt);
522 /* Look for the following pattern
523 DX = (TYPE1) X;
524 DY = (TYPE1) Y;
525 DDIFF = DX - DY;
526 DAD = ABS_EXPR <DDIFF>;
527 DDPROD = (TYPE2) DPROD;
528 sum_1 = DAD + sum_0;
529 In which
530 - DX is at least double the size of X
531 - DY is at least double the size of Y
532 - DX, DY, DDIFF, DAD all have the same type
533 - sum is the same size of DAD or bigger
534 - sum has been recognized as a reduction variable.
536 This is equivalent to:
537 DDIFF = X w- Y; #widen sub
538 DAD = ABS_EXPR <DDIFF>;
539 sum_1 = DAD w+ sum_0; #widen summation
541 DDIFF = X w- Y; #widen sub
542 DAD = ABS_EXPR <DDIFF>;
543 sum_1 = DAD + sum_0; #summation
546 /* Starting from LAST_STMT, follow the defs of its uses in search
547 of the above pattern. */
549 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
550 return NULL;
552 tree plus_oprnd0, plus_oprnd1;
554 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
556 /* Has been detected as widening-summation? */
558 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
559 sum_type = gimple_expr_type (stmt);
560 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
561 return NULL;
562 plus_oprnd0 = gimple_assign_rhs1 (stmt);
563 plus_oprnd1 = gimple_assign_rhs2 (stmt);
564 half_type = TREE_TYPE (plus_oprnd0);
566 else
568 gimple def_stmt;
570 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
571 return NULL;
572 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
573 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
574 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
575 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
576 return NULL;
578 /* The type conversion could be promotion, demotion,
579 or just signed -> unsigned. */
580 if (type_conversion_p (plus_oprnd0, last_stmt, false,
581 &half_type, &def_stmt, &promotion))
582 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
583 else
584 half_type = sum_type;
587 /* So far so good. Since last_stmt was detected as a (summation) reduction,
588 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
589 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
590 Then check that plus_oprnd0 is defined by an abs_expr. */
592 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
593 return NULL;
595 tree abs_type = half_type;
596 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
598 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
599 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
600 return NULL;
602 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
603 inside the loop (in case we are analyzing an outer-loop). */
604 if (!is_gimple_assign (abs_stmt))
605 return NULL;
607 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
608 gcc_assert (abs_stmt_vinfo);
609 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
610 return NULL;
611 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
612 return NULL;
614 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
615 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
616 return NULL;
617 if (TYPE_UNSIGNED (abs_type))
618 return NULL;
620 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
622 if (TREE_CODE (abs_oprnd) != SSA_NAME)
623 return NULL;
625 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
627 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
628 if (!gimple_bb (diff_stmt)
629 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
630 return NULL;
632 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
633 inside the loop (in case we are analyzing an outer-loop). */
634 if (!is_gimple_assign (diff_stmt))
635 return NULL;
637 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
638 gcc_assert (diff_stmt_vinfo);
639 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
640 return NULL;
641 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
642 return NULL;
644 tree half_type0, half_type1;
645 gimple def_stmt;
647 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
648 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
650 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
651 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
652 return NULL;
653 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
654 &half_type0, &def_stmt, &promotion)
655 || !promotion)
656 return NULL;
657 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
659 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
660 &half_type1, &def_stmt, &promotion)
661 || !promotion)
662 return NULL;
663 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
665 if (!types_compatible_p (half_type0, half_type1))
666 return NULL;
667 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
668 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
669 return NULL;
671 *type_in = TREE_TYPE (sad_oprnd0);
672 *type_out = sum_type;
674 /* Pattern detected. Create a stmt to be used to replace the pattern: */
675 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
676 gimple pattern_stmt = gimple_build_assign_with_ops
677 (SAD_EXPR, var, sad_oprnd0, sad_oprnd1, plus_oprnd1);
679 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location,
682 "vect_recog_sad_pattern: detected: ");
683 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
684 dump_printf (MSG_NOTE, "\n");
687 /* We don't allow changing the order of the computation in the inner-loop
688 when doing outer-loop vectorization. */
689 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
691 return pattern_stmt;
695 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
696 and LSHIFT_EXPR.
698 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
699 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
701 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
702 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
703 that satisfies the above restrictions, we can perform a widening opeartion
704 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
705 with a_it = (interm_type) a_t; */
707 static bool
708 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
709 tree const_oprnd, tree *oprnd,
710 vec<gimple> *stmts, tree type,
711 tree *half_type, gimple def_stmt)
713 tree new_type, new_oprnd;
714 gimple new_stmt;
716 if (code != MULT_EXPR && code != LSHIFT_EXPR)
717 return false;
719 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
720 || (code == LSHIFT_EXPR
721 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
722 != 1))
723 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
725 /* CONST_OPRND is a constant of HALF_TYPE. */
726 *oprnd = gimple_assign_rhs1 (def_stmt);
727 return true;
730 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
731 return false;
733 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
734 return false;
736 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
737 a type 2 times bigger than HALF_TYPE. */
738 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
739 TYPE_UNSIGNED (type));
740 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
741 || (code == LSHIFT_EXPR
742 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
743 return false;
745 /* Use NEW_TYPE for widening operation. */
746 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
748 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
749 /* Check if the already created pattern stmt is what we need. */
750 if (!is_gimple_assign (new_stmt)
751 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
752 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
753 return false;
755 stmts->safe_push (def_stmt);
756 *oprnd = gimple_assign_lhs (new_stmt);
758 else
760 /* Create a_T = (NEW_TYPE) a_t; */
761 *oprnd = gimple_assign_rhs1 (def_stmt);
762 new_oprnd = make_ssa_name (new_type, NULL);
763 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd);
764 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
765 stmts->safe_push (def_stmt);
766 *oprnd = new_oprnd;
769 *half_type = new_type;
770 return true;
774 /* Function vect_recog_widen_mult_pattern
776 Try to find the following pattern:
778 type1 a_t;
779 type2 b_t;
780 TYPE a_T, b_T, prod_T;
782 S1 a_t = ;
783 S2 b_t = ;
784 S3 a_T = (TYPE) a_t;
785 S4 b_T = (TYPE) b_t;
786 S5 prod_T = a_T * b_T;
788 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
790 Also detect unsigned cases:
792 unsigned type1 a_t;
793 unsigned type2 b_t;
794 unsigned TYPE u_prod_T;
795 TYPE a_T, b_T, prod_T;
797 S1 a_t = ;
798 S2 b_t = ;
799 S3 a_T = (TYPE) a_t;
800 S4 b_T = (TYPE) b_t;
801 S5 prod_T = a_T * b_T;
802 S6 u_prod_T = (unsigned TYPE) prod_T;
804 and multiplication by constants:
806 type a_t;
807 TYPE a_T, prod_T;
809 S1 a_t = ;
810 S3 a_T = (TYPE) a_t;
811 S5 prod_T = a_T * CONST;
813 A special case of multiplication by constants is when 'TYPE' is 4 times
814 bigger than 'type', but CONST fits an intermediate type 2 times smaller
815 than 'TYPE'. In that case we create an additional pattern stmt for S3
816 to create a variable of the intermediate type, and perform widen-mult
817 on the intermediate type as well:
819 type a_t;
820 interm_type a_it;
821 TYPE a_T, prod_T, prod_T';
823 S1 a_t = ;
824 S3 a_T = (TYPE) a_t;
825 '--> a_it = (interm_type) a_t;
826 S5 prod_T = a_T * CONST;
827 '--> prod_T' = a_it w* CONST;
829 Input/Output:
831 * STMTS: Contains a stmt from which the pattern search begins. In the
832 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
833 is detected. In case of unsigned widen-mult, the original stmt (S5) is
834 replaced with S6 in STMTS. In case of multiplication by a constant
835 of an intermediate type (the last case above), STMTS also contains S3
836 (inserted before S5).
838 Output:
840 * TYPE_IN: The type of the input arguments to the pattern.
842 * TYPE_OUT: The type of the output of this pattern.
844 * Return value: A new stmt that will be used to replace the sequence of
845 stmts that constitute the pattern. In this case it will be:
846 WIDEN_MULT <a_t, b_t>
847 If the result of WIDEN_MULT needs to be converted to a larger type, the
848 returned stmt will be this type conversion stmt.
851 static gimple
852 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
853 tree *type_in, tree *type_out)
855 gimple last_stmt = stmts->pop ();
856 gimple def_stmt0, def_stmt1;
857 tree oprnd0, oprnd1;
858 tree type, half_type0, half_type1;
859 gimple new_stmt = NULL, pattern_stmt = NULL;
860 tree vectype, vecitype;
861 tree var;
862 enum tree_code dummy_code;
863 int dummy_int;
864 vec<tree> dummy_vec;
865 bool op1_ok;
866 bool promotion;
868 if (!is_gimple_assign (last_stmt))
869 return NULL;
871 type = gimple_expr_type (last_stmt);
873 /* Starting from LAST_STMT, follow the defs of its uses in search
874 of the above pattern. */
876 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
877 return NULL;
879 oprnd0 = gimple_assign_rhs1 (last_stmt);
880 oprnd1 = gimple_assign_rhs2 (last_stmt);
881 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
882 || !types_compatible_p (TREE_TYPE (oprnd1), type))
883 return NULL;
885 /* Check argument 0. */
886 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
887 &promotion)
888 || !promotion)
889 return NULL;
890 /* Check argument 1. */
891 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
892 &def_stmt1, &promotion);
894 if (op1_ok && promotion)
896 oprnd0 = gimple_assign_rhs1 (def_stmt0);
897 oprnd1 = gimple_assign_rhs1 (def_stmt1);
899 else
901 if (TREE_CODE (oprnd1) == INTEGER_CST
902 && TREE_CODE (half_type0) == INTEGER_TYPE
903 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
904 &oprnd0, stmts, type,
905 &half_type0, def_stmt0))
907 half_type1 = half_type0;
908 oprnd1 = fold_convert (half_type1, oprnd1);
910 else
911 return NULL;
914 /* If the two arguments have different sizes, convert the one with
915 the smaller type into the larger type. */
916 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
918 tree* oprnd = NULL;
919 gimple def_stmt = NULL;
921 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
923 def_stmt = def_stmt0;
924 half_type0 = half_type1;
925 oprnd = &oprnd0;
927 else
929 def_stmt = def_stmt1;
930 half_type1 = half_type0;
931 oprnd = &oprnd1;
934 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
935 tree new_oprnd = make_ssa_name (half_type0, NULL);
936 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
937 old_oprnd);
938 *oprnd = new_oprnd;
941 /* Handle unsigned case. Look for
942 S6 u_prod_T = (unsigned TYPE) prod_T;
943 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
944 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
946 gimple use_stmt;
947 tree use_lhs;
948 tree use_type;
950 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
951 return NULL;
953 use_stmt = vect_single_imm_use (last_stmt);
954 if (!use_stmt || !is_gimple_assign (use_stmt)
955 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
956 return NULL;
958 use_lhs = gimple_assign_lhs (use_stmt);
959 use_type = TREE_TYPE (use_lhs);
960 if (!INTEGRAL_TYPE_P (use_type)
961 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
962 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
963 return NULL;
965 type = use_type;
966 last_stmt = use_stmt;
969 if (!types_compatible_p (half_type0, half_type1))
970 return NULL;
972 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
973 to get an intermediate result of type ITYPE. In this case we need
974 to build a statement to convert this intermediate result to type TYPE. */
975 tree itype = type;
976 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
977 itype = build_nonstandard_integer_type
978 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
979 TYPE_UNSIGNED (type));
981 /* Pattern detected. */
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_recog_widen_mult_pattern: detected:\n");
986 /* Check target support */
987 vectype = get_vectype_for_scalar_type (half_type0);
988 vecitype = get_vectype_for_scalar_type (itype);
989 if (!vectype
990 || !vecitype
991 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
992 vecitype, vectype,
993 &dummy_code, &dummy_code,
994 &dummy_int, &dummy_vec))
995 return NULL;
997 *type_in = vectype;
998 *type_out = get_vectype_for_scalar_type (type);
1000 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1001 var = vect_recog_temp_ssa_var (itype, NULL);
1002 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
1003 oprnd1);
1005 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1006 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1007 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1008 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1010 /* If the original two operands have different sizes, we may need to convert
1011 the smaller one into the larget type. If this is the case, at this point
1012 the new stmt is already built. */
1013 if (new_stmt)
1015 append_pattern_def_seq (stmt_vinfo, new_stmt);
1016 stmt_vec_info new_stmt_info
1017 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
1018 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1019 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1022 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1023 the result of the widen-mult operation into type TYPE. */
1024 if (itype != type)
1026 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1027 stmt_vec_info pattern_stmt_info
1028 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1029 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1030 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1031 pattern_stmt
1032 = gimple_build_assign_with_ops (NOP_EXPR,
1033 vect_recog_temp_ssa_var (type, NULL),
1034 gimple_assign_lhs (pattern_stmt));
1037 if (dump_enabled_p ())
1038 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1040 stmts->safe_push (last_stmt);
1041 return pattern_stmt;
1045 /* Function vect_recog_pow_pattern
1047 Try to find the following pattern:
1049 x = POW (y, N);
1051 with POW being one of pow, powf, powi, powif and N being
1052 either 2 or 0.5.
1054 Input:
1056 * LAST_STMT: A stmt from which the pattern search begins.
1058 Output:
1060 * TYPE_IN: The type of the input arguments to the pattern.
1062 * TYPE_OUT: The type of the output of this pattern.
1064 * Return value: A new stmt that will be used to replace the sequence of
1065 stmts that constitute the pattern. In this case it will be:
1066 x = x * x
1068 x = sqrt (x)
1071 static gimple
1072 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
1073 tree *type_out)
1075 gimple last_stmt = (*stmts)[0];
1076 tree fn, base, exp = NULL;
1077 gimple stmt;
1078 tree var;
1080 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1081 return NULL;
1083 fn = gimple_call_fndecl (last_stmt);
1084 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1085 return NULL;
1087 switch (DECL_FUNCTION_CODE (fn))
1089 case BUILT_IN_POWIF:
1090 case BUILT_IN_POWI:
1091 case BUILT_IN_POWF:
1092 case BUILT_IN_POW:
1093 base = gimple_call_arg (last_stmt, 0);
1094 exp = gimple_call_arg (last_stmt, 1);
1095 if (TREE_CODE (exp) != REAL_CST
1096 && TREE_CODE (exp) != INTEGER_CST)
1097 return NULL;
1098 break;
1100 default:
1101 return NULL;
1104 /* We now have a pow or powi builtin function call with a constant
1105 exponent. */
1107 *type_out = NULL_TREE;
1109 /* Catch squaring. */
1110 if ((tree_fits_shwi_p (exp)
1111 && tree_to_shwi (exp) == 2)
1112 || (TREE_CODE (exp) == REAL_CST
1113 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
1115 *type_in = TREE_TYPE (base);
1117 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1118 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
1119 return stmt;
1122 /* Catch square root. */
1123 if (TREE_CODE (exp) == REAL_CST
1124 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1126 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1127 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1128 if (*type_in)
1130 gcall *stmt = gimple_build_call (newfn, 1, base);
1131 if (vectorizable_function (stmt, *type_in, *type_in)
1132 != NULL_TREE)
1134 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1135 gimple_call_set_lhs (stmt, var);
1136 return stmt;
1141 return NULL;
1145 /* Function vect_recog_widen_sum_pattern
1147 Try to find the following pattern:
1149 type x_t;
1150 TYPE x_T, sum = init;
1151 loop:
1152 sum_0 = phi <init, sum_1>
1153 S1 x_t = *p;
1154 S2 x_T = (TYPE) x_t;
1155 S3 sum_1 = x_T + sum_0;
1157 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1158 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1159 a special case of a reduction computation.
1161 Input:
1163 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1164 when this function is called with S3, the pattern {S2,S3} will be detected.
1166 Output:
1168 * TYPE_IN: The type of the input arguments to the pattern.
1170 * TYPE_OUT: The type of the output of this pattern.
1172 * Return value: A new stmt that will be used to replace the sequence of
1173 stmts that constitute the pattern. In this case it will be:
1174 WIDEN_SUM <x_t, sum_0>
1176 Note: The widening-sum idiom is a widening reduction pattern that is
1177 vectorized without preserving all the intermediate results. It
1178 produces only N/2 (widened) results (by summing up pairs of
1179 intermediate results) rather than all N results. Therefore, we
1180 cannot allow this pattern when we want to get all the results and in
1181 the correct order (as is the case when this computation is in an
1182 inner-loop nested in an outer-loop that us being vectorized). */
1184 static gimple
1185 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
1186 tree *type_out)
1188 gimple stmt, last_stmt = (*stmts)[0];
1189 tree oprnd0, oprnd1;
1190 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1191 tree type, half_type;
1192 gimple pattern_stmt;
1193 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1194 struct loop *loop;
1195 tree var;
1196 bool promotion;
1198 if (!loop_info)
1199 return NULL;
1201 loop = LOOP_VINFO_LOOP (loop_info);
1203 if (!is_gimple_assign (last_stmt))
1204 return NULL;
1206 type = gimple_expr_type (last_stmt);
1208 /* Look for the following pattern
1209 DX = (TYPE) X;
1210 sum_1 = DX + sum_0;
1211 In which DX is at least double the size of X, and sum_1 has been
1212 recognized as a reduction variable.
1215 /* Starting from LAST_STMT, follow the defs of its uses in search
1216 of the above pattern. */
1218 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1219 return NULL;
1221 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
1222 return NULL;
1224 oprnd0 = gimple_assign_rhs1 (last_stmt);
1225 oprnd1 = gimple_assign_rhs2 (last_stmt);
1226 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1227 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1228 return NULL;
1230 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1231 we know that oprnd1 is the reduction variable (defined by a loop-header
1232 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1233 Left to check that oprnd0 is defined by a cast from type 'type' to type
1234 'TYPE'. */
1236 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1237 &promotion)
1238 || !promotion)
1239 return NULL;
1241 oprnd0 = gimple_assign_rhs1 (stmt);
1242 *type_in = half_type;
1243 *type_out = type;
1245 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1246 var = vect_recog_temp_ssa_var (type, NULL);
1247 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
1248 oprnd0, oprnd1);
1250 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE, vect_location,
1253 "vect_recog_widen_sum_pattern: detected: ");
1254 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1255 dump_printf (MSG_NOTE, "\n");
1258 /* We don't allow changing the order of the computation in the inner-loop
1259 when doing outer-loop vectorization. */
1260 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
1262 return pattern_stmt;
1266 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1268 Input:
1269 STMT - a statement to check.
1270 DEF - we support operations with two operands, one of which is constant.
1271 The other operand can be defined by a demotion operation, or by a
1272 previous statement in a sequence of over-promoted operations. In the
1273 later case DEF is used to replace that operand. (It is defined by a
1274 pattern statement we created for the previous statement in the
1275 sequence).
1277 Input/output:
1278 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1279 NULL, it's the type of DEF.
1280 STMTS - additional pattern statements. If a pattern statement (type
1281 conversion) is created in this function, its original statement is
1282 added to STMTS.
1284 Output:
1285 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1286 operands to use in the new pattern statement for STMT (will be created
1287 in vect_recog_over_widening_pattern ()).
1288 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1289 statements for STMT: the first one is a type promotion and the second
1290 one is the operation itself. We return the type promotion statement
1291 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1292 the second pattern statement. */
1294 static bool
1295 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1296 tree *op0, tree *op1, gimple *new_def_stmt,
1297 vec<gimple> *stmts)
1299 enum tree_code code;
1300 tree const_oprnd, oprnd;
1301 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1302 gimple def_stmt, new_stmt;
1303 bool first = false;
1304 bool promotion;
1306 *op0 = NULL_TREE;
1307 *op1 = NULL_TREE;
1308 *new_def_stmt = NULL;
1310 if (!is_gimple_assign (stmt))
1311 return false;
1313 code = gimple_assign_rhs_code (stmt);
1314 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1315 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1316 return false;
1318 oprnd = gimple_assign_rhs1 (stmt);
1319 const_oprnd = gimple_assign_rhs2 (stmt);
1320 type = gimple_expr_type (stmt);
1322 if (TREE_CODE (oprnd) != SSA_NAME
1323 || TREE_CODE (const_oprnd) != INTEGER_CST)
1324 return false;
1326 /* If oprnd has other uses besides that in stmt we cannot mark it
1327 as being part of a pattern only. */
1328 if (!has_single_use (oprnd))
1329 return false;
1331 /* If we are in the middle of a sequence, we use DEF from a previous
1332 statement. Otherwise, OPRND has to be a result of type promotion. */
1333 if (*new_type)
1335 half_type = *new_type;
1336 oprnd = def;
1338 else
1340 first = true;
1341 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1342 &promotion)
1343 || !promotion
1344 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1345 return false;
1348 /* Can we perform the operation on a smaller type? */
1349 switch (code)
1351 case BIT_IOR_EXPR:
1352 case BIT_XOR_EXPR:
1353 case BIT_AND_EXPR:
1354 if (!int_fits_type_p (const_oprnd, half_type))
1356 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1357 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1358 return false;
1360 interm_type = build_nonstandard_integer_type (
1361 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1362 if (!int_fits_type_p (const_oprnd, interm_type))
1363 return false;
1366 break;
1368 case LSHIFT_EXPR:
1369 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1370 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1371 return false;
1373 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1374 (e.g., if the original value was char, the shift amount is at most 8
1375 if we want to use short). */
1376 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1377 return false;
1379 interm_type = build_nonstandard_integer_type (
1380 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1382 if (!vect_supportable_shift (code, interm_type))
1383 return false;
1385 break;
1387 case RSHIFT_EXPR:
1388 if (vect_supportable_shift (code, half_type))
1389 break;
1391 /* Try intermediate type - HALF_TYPE is not supported. */
1392 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1393 return false;
1395 interm_type = build_nonstandard_integer_type (
1396 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1398 if (!vect_supportable_shift (code, interm_type))
1399 return false;
1401 break;
1403 default:
1404 gcc_unreachable ();
1407 /* There are four possible cases:
1408 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1409 the first statement in the sequence)
1410 a. The original, HALF_TYPE, is not enough - we replace the promotion
1411 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1412 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1413 promotion.
1414 2. OPRND is defined by a pattern statement we created.
1415 a. Its type is not sufficient for the operation, we create a new stmt:
1416 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1417 this statement in NEW_DEF_STMT, and it is later put in
1418 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1419 b. OPRND is good to use in the new statement. */
1420 if (first)
1422 if (interm_type)
1424 /* Replace the original type conversion HALF_TYPE->TYPE with
1425 HALF_TYPE->INTERM_TYPE. */
1426 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1428 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1429 /* Check if the already created pattern stmt is what we need. */
1430 if (!is_gimple_assign (new_stmt)
1431 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1432 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1433 return false;
1435 stmts->safe_push (def_stmt);
1436 oprnd = gimple_assign_lhs (new_stmt);
1438 else
1440 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1441 oprnd = gimple_assign_rhs1 (def_stmt);
1442 new_oprnd = make_ssa_name (interm_type, NULL);
1443 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1444 oprnd);
1445 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1446 stmts->safe_push (def_stmt);
1447 oprnd = new_oprnd;
1450 else
1452 /* Retrieve the operand before the type promotion. */
1453 oprnd = gimple_assign_rhs1 (def_stmt);
1456 else
1458 if (interm_type)
1460 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1461 new_oprnd = make_ssa_name (interm_type, NULL);
1462 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1463 oprnd);
1464 oprnd = new_oprnd;
1465 *new_def_stmt = new_stmt;
1468 /* Otherwise, OPRND is already set. */
1471 if (interm_type)
1472 *new_type = interm_type;
1473 else
1474 *new_type = half_type;
1476 *op0 = oprnd;
1477 *op1 = fold_convert (*new_type, const_oprnd);
1479 return true;
1483 /* Try to find a statement or a sequence of statements that can be performed
1484 on a smaller type:
1486 type x_t;
1487 TYPE x_T, res0_T, res1_T;
1488 loop:
1489 S1 x_t = *p;
1490 S2 x_T = (TYPE) x_t;
1491 S3 res0_T = op (x_T, C0);
1492 S4 res1_T = op (res0_T, C1);
1493 S5 ... = () res1_T; - type demotion
1495 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1496 constants.
1497 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1498 be 'type' or some intermediate type. For now, we expect S5 to be a type
1499 demotion operation. We also check that S3 and S4 have only one use. */
1501 static gimple
1502 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1503 tree *type_in, tree *type_out)
1505 gimple stmt = stmts->pop ();
1506 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1507 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1508 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1509 bool first;
1510 tree type = NULL;
1512 first = true;
1513 while (1)
1515 if (!vinfo_for_stmt (stmt)
1516 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1517 return NULL;
1519 new_def_stmt = NULL;
1520 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1521 &op0, &op1, &new_def_stmt,
1522 stmts))
1524 if (first)
1525 return NULL;
1526 else
1527 break;
1530 /* STMT can be performed on a smaller type. Check its uses. */
1531 use_stmt = vect_single_imm_use (stmt);
1532 if (!use_stmt || !is_gimple_assign (use_stmt))
1533 return NULL;
1535 /* Create pattern statement for STMT. */
1536 vectype = get_vectype_for_scalar_type (new_type);
1537 if (!vectype)
1538 return NULL;
1540 /* We want to collect all the statements for which we create pattern
1541 statetments, except for the case when the last statement in the
1542 sequence doesn't have a corresponding pattern statement. In such
1543 case we associate the last pattern statement with the last statement
1544 in the sequence. Therefore, we only add the original statement to
1545 the list if we know that it is not the last. */
1546 if (prev_stmt)
1547 stmts->safe_push (prev_stmt);
1549 var = vect_recog_temp_ssa_var (new_type, NULL);
1550 pattern_stmt
1551 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1552 op0, op1);
1553 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1554 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1556 if (dump_enabled_p ())
1558 dump_printf_loc (MSG_NOTE, vect_location,
1559 "created pattern stmt: ");
1560 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1561 dump_printf (MSG_NOTE, "\n");
1564 type = gimple_expr_type (stmt);
1565 prev_stmt = stmt;
1566 stmt = use_stmt;
1568 first = false;
1571 /* We got a sequence. We expect it to end with a type demotion operation.
1572 Otherwise, we quit (for now). There are three possible cases: the
1573 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1574 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1575 NEW_TYPE differs (we create a new conversion statement). */
1576 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1578 use_lhs = gimple_assign_lhs (use_stmt);
1579 use_type = TREE_TYPE (use_lhs);
1580 /* Support only type demotion or signedess change. */
1581 if (!INTEGRAL_TYPE_P (use_type)
1582 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1583 return NULL;
1585 /* Check that NEW_TYPE is not bigger than the conversion result. */
1586 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1587 return NULL;
1589 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1590 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1592 /* Create NEW_TYPE->USE_TYPE conversion. */
1593 new_oprnd = make_ssa_name (use_type, NULL);
1594 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1595 var);
1596 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1598 *type_in = get_vectype_for_scalar_type (new_type);
1599 *type_out = get_vectype_for_scalar_type (use_type);
1601 /* We created a pattern statement for the last statement in the
1602 sequence, so we don't need to associate it with the pattern
1603 statement created for PREV_STMT. Therefore, we add PREV_STMT
1604 to the list in order to mark it later in vect_pattern_recog_1. */
1605 if (prev_stmt)
1606 stmts->safe_push (prev_stmt);
1608 else
1610 if (prev_stmt)
1611 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1612 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1614 *type_in = vectype;
1615 *type_out = NULL_TREE;
1618 stmts->safe_push (use_stmt);
1620 else
1621 /* TODO: support general case, create a conversion to the correct type. */
1622 return NULL;
1624 /* Pattern detected. */
1625 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_NOTE, vect_location,
1628 "vect_recog_over_widening_pattern: detected: ");
1629 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1630 dump_printf (MSG_NOTE, "\n");
1633 return pattern_stmt;
1636 /* Detect widening shift pattern:
1638 type a_t;
1639 TYPE a_T, res_T;
1641 S1 a_t = ;
1642 S2 a_T = (TYPE) a_t;
1643 S3 res_T = a_T << CONST;
1645 where type 'TYPE' is at least double the size of type 'type'.
1647 Also detect cases where the shift result is immediately converted
1648 to another type 'result_type' that is no larger in size than 'TYPE'.
1649 In those cases we perform a widen-shift that directly results in
1650 'result_type', to avoid a possible over-widening situation:
1652 type a_t;
1653 TYPE a_T, res_T;
1654 result_type res_result;
1656 S1 a_t = ;
1657 S2 a_T = (TYPE) a_t;
1658 S3 res_T = a_T << CONST;
1659 S4 res_result = (result_type) res_T;
1660 '--> res_result' = a_t w<< CONST;
1662 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1663 create an additional pattern stmt for S2 to create a variable of an
1664 intermediate type, and perform widen-shift on the intermediate type:
1666 type a_t;
1667 interm_type a_it;
1668 TYPE a_T, res_T, res_T';
1670 S1 a_t = ;
1671 S2 a_T = (TYPE) a_t;
1672 '--> a_it = (interm_type) a_t;
1673 S3 res_T = a_T << CONST;
1674 '--> res_T' = a_it <<* CONST;
1676 Input/Output:
1678 * STMTS: Contains a stmt from which the pattern search begins.
1679 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1680 in STMTS. When an intermediate type is used and a pattern statement is
1681 created for S2, we also put S2 here (before S3).
1683 Output:
1685 * TYPE_IN: The type of the input arguments to the pattern.
1687 * TYPE_OUT: The type of the output of this pattern.
1689 * Return value: A new stmt that will be used to replace the sequence of
1690 stmts that constitute the pattern. In this case it will be:
1691 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1693 static gimple
1694 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1695 tree *type_in, tree *type_out)
1697 gimple last_stmt = stmts->pop ();
1698 gimple def_stmt0;
1699 tree oprnd0, oprnd1;
1700 tree type, half_type0;
1701 gimple pattern_stmt;
1702 tree vectype, vectype_out = NULL_TREE;
1703 tree var;
1704 enum tree_code dummy_code;
1705 int dummy_int;
1706 vec<tree> dummy_vec;
1707 gimple use_stmt;
1708 bool promotion;
1710 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1711 return NULL;
1713 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1714 return NULL;
1716 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1717 return NULL;
1719 oprnd0 = gimple_assign_rhs1 (last_stmt);
1720 oprnd1 = gimple_assign_rhs2 (last_stmt);
1721 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1722 return NULL;
1724 /* Check operand 0: it has to be defined by a type promotion. */
1725 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1726 &promotion)
1727 || !promotion)
1728 return NULL;
1730 /* Check operand 1: has to be positive. We check that it fits the type
1731 in vect_handle_widen_op_by_const (). */
1732 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1733 return NULL;
1735 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1736 type = gimple_expr_type (last_stmt);
1738 /* Check for subsequent conversion to another type. */
1739 use_stmt = vect_single_imm_use (last_stmt);
1740 if (use_stmt && is_gimple_assign (use_stmt)
1741 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1742 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1744 tree use_lhs = gimple_assign_lhs (use_stmt);
1745 tree use_type = TREE_TYPE (use_lhs);
1747 if (INTEGRAL_TYPE_P (use_type)
1748 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1750 last_stmt = use_stmt;
1751 type = use_type;
1755 /* Check if this a widening operation. */
1756 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1757 &oprnd0, stmts,
1758 type, &half_type0, def_stmt0))
1759 return NULL;
1761 /* Pattern detected. */
1762 if (dump_enabled_p ())
1763 dump_printf_loc (MSG_NOTE, vect_location,
1764 "vect_recog_widen_shift_pattern: detected:\n");
1766 /* Check target support. */
1767 vectype = get_vectype_for_scalar_type (half_type0);
1768 vectype_out = get_vectype_for_scalar_type (type);
1770 if (!vectype
1771 || !vectype_out
1772 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1773 vectype_out, vectype,
1774 &dummy_code, &dummy_code,
1775 &dummy_int, &dummy_vec))
1776 return NULL;
1778 *type_in = vectype;
1779 *type_out = vectype_out;
1781 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1782 var = vect_recog_temp_ssa_var (type, NULL);
1783 pattern_stmt =
1784 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1786 if (dump_enabled_p ())
1787 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1789 stmts->safe_push (last_stmt);
1790 return pattern_stmt;
1793 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1795 type a_t, b_t, c_t;
1797 S0 a_t = b_t r<< c_t;
1799 Input/Output:
1801 * STMTS: Contains a stmt from which the pattern search begins,
1802 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1803 with a sequence:
1805 S1 d_t = -c_t;
1806 S2 e_t = d_t & (B - 1);
1807 S3 f_t = b_t << c_t;
1808 S4 g_t = b_t >> e_t;
1809 S0 a_t = f_t | g_t;
1811 where B is element bitsize of type.
1813 Output:
1815 * TYPE_IN: The type of the input arguments to the pattern.
1817 * TYPE_OUT: The type of the output of this pattern.
1819 * Return value: A new stmt that will be used to replace the rotate
1820 S0 stmt. */
1822 static gimple
1823 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1825 gimple last_stmt = stmts->pop ();
1826 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1827 gimple pattern_stmt, def_stmt;
1828 enum tree_code rhs_code;
1829 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1830 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1831 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1832 enum vect_def_type dt;
1833 optab optab1, optab2;
1834 edge ext_def = NULL;
1836 if (!is_gimple_assign (last_stmt))
1837 return NULL;
1839 rhs_code = gimple_assign_rhs_code (last_stmt);
1840 switch (rhs_code)
1842 case LROTATE_EXPR:
1843 case RROTATE_EXPR:
1844 break;
1845 default:
1846 return NULL;
1849 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1850 return NULL;
1852 lhs = gimple_assign_lhs (last_stmt);
1853 oprnd0 = gimple_assign_rhs1 (last_stmt);
1854 type = TREE_TYPE (oprnd0);
1855 oprnd1 = gimple_assign_rhs2 (last_stmt);
1856 if (TREE_CODE (oprnd0) != SSA_NAME
1857 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1858 || !INTEGRAL_TYPE_P (type)
1859 || !TYPE_UNSIGNED (type))
1860 return NULL;
1862 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1863 &def, &dt))
1864 return NULL;
1866 if (dt != vect_internal_def
1867 && dt != vect_constant_def
1868 && dt != vect_external_def)
1869 return NULL;
1871 vectype = get_vectype_for_scalar_type (type);
1872 if (vectype == NULL_TREE)
1873 return NULL;
1875 /* If vector/vector or vector/scalar rotate is supported by the target,
1876 don't do anything here. */
1877 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1878 if (optab1
1879 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1880 return NULL;
1882 if (bb_vinfo != NULL || dt != vect_internal_def)
1884 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1885 if (optab2
1886 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1887 return NULL;
1890 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1891 don't do anything here either. */
1892 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1893 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1894 if (!optab1
1895 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1896 || !optab2
1897 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1899 if (bb_vinfo == NULL && dt == vect_internal_def)
1900 return NULL;
1901 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1902 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1903 if (!optab1
1904 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1905 || !optab2
1906 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1907 return NULL;
1910 *type_in = vectype;
1911 *type_out = vectype;
1912 if (*type_in == NULL_TREE)
1913 return NULL;
1915 if (dt == vect_external_def
1916 && TREE_CODE (oprnd1) == SSA_NAME
1917 && loop_vinfo)
1919 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1920 ext_def = loop_preheader_edge (loop);
1921 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1923 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1924 if (bb == NULL
1925 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1926 ext_def = NULL;
1930 def = NULL_TREE;
1931 if (TREE_CODE (oprnd1) == INTEGER_CST
1932 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1933 def = oprnd1;
1934 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1936 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1937 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1938 && TYPE_PRECISION (TREE_TYPE (rhs1))
1939 == TYPE_PRECISION (type))
1940 def = rhs1;
1943 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1944 if (def == NULL_TREE)
1946 def = vect_recog_temp_ssa_var (type, NULL);
1947 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1);
1948 if (ext_def)
1950 basic_block new_bb
1951 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1952 gcc_assert (!new_bb);
1954 else
1955 append_pattern_def_seq (stmt_vinfo, def_stmt);
1957 stype = TREE_TYPE (def);
1959 if (TREE_CODE (def) == INTEGER_CST)
1961 if (!tree_fits_uhwi_p (def)
1962 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1963 || integer_zerop (def))
1964 return NULL;
1965 def2 = build_int_cst (stype,
1966 GET_MODE_PRECISION (TYPE_MODE (type))
1967 - tree_to_uhwi (def));
1969 else
1971 tree vecstype = get_vectype_for_scalar_type (stype);
1972 stmt_vec_info def_stmt_vinfo;
1974 if (vecstype == NULL_TREE)
1975 return NULL;
1976 def2 = vect_recog_temp_ssa_var (stype, NULL);
1977 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def);
1978 if (ext_def)
1980 basic_block new_bb
1981 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1982 gcc_assert (!new_bb);
1984 else
1986 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1987 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1988 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1989 append_pattern_def_seq (stmt_vinfo, def_stmt);
1992 def2 = vect_recog_temp_ssa_var (stype, NULL);
1993 tree mask
1994 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1995 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1996 gimple_assign_lhs (def_stmt),
1997 mask);
1998 if (ext_def)
2000 basic_block new_bb
2001 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2002 gcc_assert (!new_bb);
2004 else
2006 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2007 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2008 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2009 append_pattern_def_seq (stmt_vinfo, def_stmt);
2013 var1 = vect_recog_temp_ssa_var (type, NULL);
2014 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
2015 ? LSHIFT_EXPR : RSHIFT_EXPR,
2016 var1, oprnd0, def);
2017 append_pattern_def_seq (stmt_vinfo, def_stmt);
2019 var2 = vect_recog_temp_ssa_var (type, NULL);
2020 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
2021 ? RSHIFT_EXPR : LSHIFT_EXPR,
2022 var2, oprnd0, def2);
2023 append_pattern_def_seq (stmt_vinfo, def_stmt);
2025 /* Pattern detected. */
2026 if (dump_enabled_p ())
2027 dump_printf_loc (MSG_NOTE, vect_location,
2028 "vect_recog_rotate_pattern: detected:\n");
2030 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2031 var = vect_recog_temp_ssa_var (type, NULL);
2032 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
2034 if (dump_enabled_p ())
2035 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2037 stmts->safe_push (last_stmt);
2038 return pattern_stmt;
2041 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2042 vectorized:
2044 type a_t;
2045 TYPE b_T, res_T;
2047 S1 a_t = ;
2048 S2 b_T = ;
2049 S3 res_T = b_T op a_t;
2051 where type 'TYPE' is a type with different size than 'type',
2052 and op is <<, >> or rotate.
2054 Also detect cases:
2056 type a_t;
2057 TYPE b_T, c_T, res_T;
2059 S0 c_T = ;
2060 S1 a_t = (type) c_T;
2061 S2 b_T = ;
2062 S3 res_T = b_T op a_t;
2064 Input/Output:
2066 * STMTS: Contains a stmt from which the pattern search begins,
2067 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2068 with a shift/rotate which has same type on both operands, in the
2069 second case just b_T op c_T, in the first case with added cast
2070 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2072 Output:
2074 * TYPE_IN: The type of the input arguments to the pattern.
2076 * TYPE_OUT: The type of the output of this pattern.
2078 * Return value: A new stmt that will be used to replace the shift/rotate
2079 S3 stmt. */
2081 static gimple
2082 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
2083 tree *type_in, tree *type_out)
2085 gimple last_stmt = stmts->pop ();
2086 tree oprnd0, oprnd1, lhs, var;
2087 gimple pattern_stmt, def_stmt;
2088 enum tree_code rhs_code;
2089 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2090 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2091 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2092 enum vect_def_type dt;
2093 tree def;
2095 if (!is_gimple_assign (last_stmt))
2096 return NULL;
2098 rhs_code = gimple_assign_rhs_code (last_stmt);
2099 switch (rhs_code)
2101 case LSHIFT_EXPR:
2102 case RSHIFT_EXPR:
2103 case LROTATE_EXPR:
2104 case RROTATE_EXPR:
2105 break;
2106 default:
2107 return NULL;
2110 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2111 return NULL;
2113 lhs = gimple_assign_lhs (last_stmt);
2114 oprnd0 = gimple_assign_rhs1 (last_stmt);
2115 oprnd1 = gimple_assign_rhs2 (last_stmt);
2116 if (TREE_CODE (oprnd0) != SSA_NAME
2117 || TREE_CODE (oprnd1) != SSA_NAME
2118 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2119 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2120 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2121 || TYPE_PRECISION (TREE_TYPE (lhs))
2122 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2123 return NULL;
2125 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
2126 &def, &dt))
2127 return NULL;
2129 if (dt != vect_internal_def)
2130 return NULL;
2132 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2133 *type_out = *type_in;
2134 if (*type_in == NULL_TREE)
2135 return NULL;
2137 def = NULL_TREE;
2138 if (gimple_assign_cast_p (def_stmt))
2140 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2141 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2142 && TYPE_PRECISION (TREE_TYPE (rhs1))
2143 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2144 def = rhs1;
2147 if (def == NULL_TREE)
2149 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2150 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1);
2151 new_pattern_def_seq (stmt_vinfo, def_stmt);
2154 /* Pattern detected. */
2155 if (dump_enabled_p ())
2156 dump_printf_loc (MSG_NOTE, vect_location,
2157 "vect_recog_vector_vector_shift_pattern: detected:\n");
2159 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2160 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2161 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
2163 if (dump_enabled_p ())
2164 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2166 stmts->safe_push (last_stmt);
2167 return pattern_stmt;
2170 /* Detect a signed division by a constant that wouldn't be
2171 otherwise vectorized:
2173 type a_t, b_t;
2175 S1 a_t = b_t / N;
2177 where type 'type' is an integral type and N is a constant.
2179 Similarly handle modulo by a constant:
2181 S4 a_t = b_t % N;
2183 Input/Output:
2185 * STMTS: Contains a stmt from which the pattern search begins,
2186 i.e. the division stmt. S1 is replaced by if N is a power
2187 of two constant and type is signed:
2188 S3 y_t = b_t < 0 ? N - 1 : 0;
2189 S2 x_t = b_t + y_t;
2190 S1' a_t = x_t >> log2 (N);
2192 S4 is replaced if N is a power of two constant and
2193 type is signed by (where *_T temporaries have unsigned type):
2194 S9 y_T = b_t < 0 ? -1U : 0U;
2195 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2196 S7 z_t = (type) z_T;
2197 S6 w_t = b_t + z_t;
2198 S5 x_t = w_t & (N - 1);
2199 S4' a_t = x_t - z_t;
2201 Output:
2203 * TYPE_IN: The type of the input arguments to the pattern.
2205 * TYPE_OUT: The type of the output of this pattern.
2207 * Return value: A new stmt that will be used to replace the division
2208 S1 or modulo S4 stmt. */
2210 static gimple
2211 vect_recog_divmod_pattern (vec<gimple> *stmts,
2212 tree *type_in, tree *type_out)
2214 gimple last_stmt = stmts->pop ();
2215 tree oprnd0, oprnd1, vectype, itype, cond;
2216 gimple pattern_stmt, def_stmt;
2217 enum tree_code rhs_code;
2218 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2219 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2220 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2221 optab optab;
2222 tree q;
2223 int dummy_int, prec;
2224 stmt_vec_info def_stmt_vinfo;
2226 if (!is_gimple_assign (last_stmt))
2227 return NULL;
2229 rhs_code = gimple_assign_rhs_code (last_stmt);
2230 switch (rhs_code)
2232 case TRUNC_DIV_EXPR:
2233 case TRUNC_MOD_EXPR:
2234 break;
2235 default:
2236 return NULL;
2239 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2240 return NULL;
2242 oprnd0 = gimple_assign_rhs1 (last_stmt);
2243 oprnd1 = gimple_assign_rhs2 (last_stmt);
2244 itype = TREE_TYPE (oprnd0);
2245 if (TREE_CODE (oprnd0) != SSA_NAME
2246 || TREE_CODE (oprnd1) != INTEGER_CST
2247 || TREE_CODE (itype) != INTEGER_TYPE
2248 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2249 return NULL;
2251 vectype = get_vectype_for_scalar_type (itype);
2252 if (vectype == NULL_TREE)
2253 return NULL;
2255 /* If the target can handle vectorized division or modulo natively,
2256 don't attempt to optimize this. */
2257 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2258 if (optab != unknown_optab)
2260 machine_mode vec_mode = TYPE_MODE (vectype);
2261 int icode = (int) optab_handler (optab, vec_mode);
2262 if (icode != CODE_FOR_nothing)
2263 return NULL;
2266 prec = TYPE_PRECISION (itype);
2267 if (integer_pow2p (oprnd1))
2269 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2270 return NULL;
2272 /* Pattern detected. */
2273 if (dump_enabled_p ())
2274 dump_printf_loc (MSG_NOTE, vect_location,
2275 "vect_recog_divmod_pattern: detected:\n");
2277 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2278 build_int_cst (itype, 0));
2279 if (rhs_code == TRUNC_DIV_EXPR)
2281 tree var = vect_recog_temp_ssa_var (itype, NULL);
2282 tree shift;
2283 def_stmt
2284 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2285 fold_build2 (MINUS_EXPR, itype,
2286 oprnd1,
2287 build_int_cst (itype,
2288 1)),
2289 build_int_cst (itype, 0));
2290 new_pattern_def_seq (stmt_vinfo, def_stmt);
2291 var = vect_recog_temp_ssa_var (itype, NULL);
2292 def_stmt
2293 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
2294 gimple_assign_lhs (def_stmt));
2295 append_pattern_def_seq (stmt_vinfo, def_stmt);
2297 shift = build_int_cst (itype, tree_log2 (oprnd1));
2298 pattern_stmt
2299 = gimple_build_assign_with_ops (RSHIFT_EXPR,
2300 vect_recog_temp_ssa_var (itype,
2301 NULL),
2302 var, shift);
2304 else
2306 tree signmask;
2307 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2308 if (compare_tree_int (oprnd1, 2) == 0)
2310 signmask = vect_recog_temp_ssa_var (itype, NULL);
2311 def_stmt
2312 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
2313 build_int_cst (itype, 1),
2314 build_int_cst (itype, 0));
2315 append_pattern_def_seq (stmt_vinfo, def_stmt);
2317 else
2319 tree utype
2320 = build_nonstandard_integer_type (prec, 1);
2321 tree vecutype = get_vectype_for_scalar_type (utype);
2322 tree shift
2323 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2324 - tree_log2 (oprnd1));
2325 tree var = vect_recog_temp_ssa_var (utype, NULL);
2327 def_stmt
2328 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2329 build_int_cst (utype, -1),
2330 build_int_cst (utype, 0));
2331 def_stmt_vinfo
2332 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2333 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2334 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2335 append_pattern_def_seq (stmt_vinfo, def_stmt);
2336 var = vect_recog_temp_ssa_var (utype, NULL);
2337 def_stmt
2338 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2339 gimple_assign_lhs (def_stmt),
2340 shift);
2341 def_stmt_vinfo
2342 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2343 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2344 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2345 append_pattern_def_seq (stmt_vinfo, def_stmt);
2346 signmask = vect_recog_temp_ssa_var (itype, NULL);
2347 def_stmt
2348 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var);
2349 append_pattern_def_seq (stmt_vinfo, def_stmt);
2351 def_stmt
2352 = gimple_build_assign_with_ops (PLUS_EXPR,
2353 vect_recog_temp_ssa_var (itype,
2354 NULL),
2355 oprnd0, signmask);
2356 append_pattern_def_seq (stmt_vinfo, def_stmt);
2357 def_stmt
2358 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2359 vect_recog_temp_ssa_var (itype,
2360 NULL),
2361 gimple_assign_lhs (def_stmt),
2362 fold_build2 (MINUS_EXPR, itype,
2363 oprnd1,
2364 build_int_cst (itype,
2365 1)));
2366 append_pattern_def_seq (stmt_vinfo, def_stmt);
2368 pattern_stmt
2369 = gimple_build_assign_with_ops (MINUS_EXPR,
2370 vect_recog_temp_ssa_var (itype,
2371 NULL),
2372 gimple_assign_lhs (def_stmt),
2373 signmask);
2376 if (dump_enabled_p ())
2377 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2380 stmts->safe_push (last_stmt);
2382 *type_in = vectype;
2383 *type_out = vectype;
2384 return pattern_stmt;
2387 if (prec > HOST_BITS_PER_WIDE_INT
2388 || integer_zerop (oprnd1))
2389 return NULL;
2391 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2392 return NULL;
2394 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2396 if (TYPE_UNSIGNED (itype))
2398 unsigned HOST_WIDE_INT mh, ml;
2399 int pre_shift, post_shift;
2400 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2401 & GET_MODE_MASK (TYPE_MODE (itype)));
2402 tree t1, t2, t3, t4;
2404 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2405 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2406 return NULL;
2408 /* Find a suitable multiplier and right shift count
2409 instead of multiplying with D. */
2410 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2412 /* If the suggested multiplier is more than SIZE bits, we can do better
2413 for even divisors, using an initial right shift. */
2414 if (mh != 0 && (d & 1) == 0)
2416 pre_shift = floor_log2 (d & -d);
2417 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2418 &ml, &post_shift, &dummy_int);
2419 gcc_assert (!mh);
2421 else
2422 pre_shift = 0;
2424 if (mh != 0)
2426 if (post_shift - 1 >= prec)
2427 return NULL;
2429 /* t1 = oprnd0 h* ml;
2430 t2 = oprnd0 - t1;
2431 t3 = t2 >> 1;
2432 t4 = t1 + t3;
2433 q = t4 >> (post_shift - 1); */
2434 t1 = vect_recog_temp_ssa_var (itype, NULL);
2435 def_stmt
2436 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2437 build_int_cst (itype, ml));
2438 append_pattern_def_seq (stmt_vinfo, def_stmt);
2440 t2 = vect_recog_temp_ssa_var (itype, NULL);
2441 def_stmt
2442 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2443 append_pattern_def_seq (stmt_vinfo, def_stmt);
2445 t3 = vect_recog_temp_ssa_var (itype, NULL);
2446 def_stmt
2447 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2448 integer_one_node);
2449 append_pattern_def_seq (stmt_vinfo, def_stmt);
2451 t4 = vect_recog_temp_ssa_var (itype, NULL);
2452 def_stmt
2453 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2455 if (post_shift != 1)
2457 append_pattern_def_seq (stmt_vinfo, def_stmt);
2459 q = vect_recog_temp_ssa_var (itype, NULL);
2460 pattern_stmt
2461 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2462 build_int_cst (itype,
2463 post_shift
2464 - 1));
2466 else
2468 q = t4;
2469 pattern_stmt = def_stmt;
2472 else
2474 if (pre_shift >= prec || post_shift >= prec)
2475 return NULL;
2477 /* t1 = oprnd0 >> pre_shift;
2478 t2 = t1 h* ml;
2479 q = t2 >> post_shift; */
2480 if (pre_shift)
2482 t1 = vect_recog_temp_ssa_var (itype, NULL);
2483 def_stmt
2484 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2485 build_int_cst (NULL,
2486 pre_shift));
2487 append_pattern_def_seq (stmt_vinfo, def_stmt);
2489 else
2490 t1 = oprnd0;
2492 t2 = vect_recog_temp_ssa_var (itype, NULL);
2493 def_stmt
2494 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2495 build_int_cst (itype, ml));
2497 if (post_shift)
2499 append_pattern_def_seq (stmt_vinfo, def_stmt);
2501 q = vect_recog_temp_ssa_var (itype, NULL);
2502 def_stmt
2503 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2504 build_int_cst (itype,
2505 post_shift));
2507 else
2508 q = t2;
2510 pattern_stmt = def_stmt;
2513 else
2515 unsigned HOST_WIDE_INT ml;
2516 int post_shift;
2517 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2518 unsigned HOST_WIDE_INT abs_d;
2519 bool add = false;
2520 tree t1, t2, t3, t4;
2522 /* Give up for -1. */
2523 if (d == -1)
2524 return NULL;
2526 /* Since d might be INT_MIN, we have to cast to
2527 unsigned HOST_WIDE_INT before negating to avoid
2528 undefined signed overflow. */
2529 abs_d = (d >= 0
2530 ? (unsigned HOST_WIDE_INT) d
2531 : - (unsigned HOST_WIDE_INT) d);
2533 /* n rem d = n rem -d */
2534 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2536 d = abs_d;
2537 oprnd1 = build_int_cst (itype, abs_d);
2539 else if (HOST_BITS_PER_WIDE_INT >= prec
2540 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2541 /* This case is not handled correctly below. */
2542 return NULL;
2544 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2545 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2547 add = true;
2548 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2550 if (post_shift >= prec)
2551 return NULL;
2553 /* t1 = oprnd0 h* ml; */
2554 t1 = vect_recog_temp_ssa_var (itype, NULL);
2555 def_stmt
2556 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2557 build_int_cst (itype, ml));
2559 if (add)
2561 /* t2 = t1 + oprnd0; */
2562 append_pattern_def_seq (stmt_vinfo, def_stmt);
2563 t2 = vect_recog_temp_ssa_var (itype, NULL);
2564 def_stmt
2565 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2567 else
2568 t2 = t1;
2570 if (post_shift)
2572 /* t3 = t2 >> post_shift; */
2573 append_pattern_def_seq (stmt_vinfo, def_stmt);
2574 t3 = vect_recog_temp_ssa_var (itype, NULL);
2575 def_stmt
2576 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2577 build_int_cst (itype, post_shift));
2579 else
2580 t3 = t2;
2582 wide_int oprnd0_min, oprnd0_max;
2583 int msb = 1;
2584 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2586 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2587 msb = 0;
2588 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2589 msb = -1;
2592 if (msb == 0 && d >= 0)
2594 /* q = t3; */
2595 q = t3;
2596 pattern_stmt = def_stmt;
2598 else
2600 /* t4 = oprnd0 >> (prec - 1);
2601 or if we know from VRP that oprnd0 >= 0
2602 t4 = 0;
2603 or if we know from VRP that oprnd0 < 0
2604 t4 = -1; */
2605 append_pattern_def_seq (stmt_vinfo, def_stmt);
2606 t4 = vect_recog_temp_ssa_var (itype, NULL);
2607 if (msb != 1)
2608 def_stmt
2609 = gimple_build_assign_with_ops (INTEGER_CST,
2610 t4, build_int_cst (itype, msb));
2611 else
2612 def_stmt
2613 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2614 build_int_cst (itype, prec - 1));
2615 append_pattern_def_seq (stmt_vinfo, def_stmt);
2617 /* q = t3 - t4; or q = t4 - t3; */
2618 q = vect_recog_temp_ssa_var (itype, NULL);
2619 pattern_stmt
2620 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2621 d < 0 ? t3 : t4);
2625 if (rhs_code == TRUNC_MOD_EXPR)
2627 tree r, t1;
2629 /* We divided. Now finish by:
2630 t1 = q * oprnd1;
2631 r = oprnd0 - t1; */
2632 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2634 t1 = vect_recog_temp_ssa_var (itype, NULL);
2635 def_stmt
2636 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2637 append_pattern_def_seq (stmt_vinfo, def_stmt);
2639 r = vect_recog_temp_ssa_var (itype, NULL);
2640 pattern_stmt
2641 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2644 /* Pattern detected. */
2645 if (dump_enabled_p ())
2647 dump_printf_loc (MSG_NOTE, vect_location,
2648 "vect_recog_divmod_pattern: detected: ");
2649 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2650 dump_printf (MSG_NOTE, "\n");
2653 stmts->safe_push (last_stmt);
2655 *type_in = vectype;
2656 *type_out = vectype;
2657 return pattern_stmt;
2660 /* Function vect_recog_mixed_size_cond_pattern
2662 Try to find the following pattern:
2664 type x_t, y_t;
2665 TYPE a_T, b_T, c_T;
2666 loop:
2667 S1 a_T = x_t CMP y_t ? b_T : c_T;
2669 where type 'TYPE' is an integral type which has different size
2670 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2671 than 'type', the constants need to fit into an integer type
2672 with the same width as 'type') or results of conversion from 'type'.
2674 Input:
2676 * LAST_STMT: A stmt from which the pattern search begins.
2678 Output:
2680 * TYPE_IN: The type of the input arguments to the pattern.
2682 * TYPE_OUT: The type of the output of this pattern.
2684 * Return value: A new stmt that will be used to replace the pattern.
2685 Additionally a def_stmt is added.
2687 a_it = x_t CMP y_t ? b_it : c_it;
2688 a_T = (TYPE) a_it; */
2690 static gimple
2691 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2692 tree *type_out)
2694 gimple last_stmt = (*stmts)[0];
2695 tree cond_expr, then_clause, else_clause;
2696 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2697 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2698 machine_mode cmpmode;
2699 gimple pattern_stmt, def_stmt;
2700 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2701 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2702 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2703 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2704 bool promotion;
2705 tree comp_scalar_type;
2707 if (!is_gimple_assign (last_stmt)
2708 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2709 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2710 return NULL;
2712 cond_expr = gimple_assign_rhs1 (last_stmt);
2713 then_clause = gimple_assign_rhs2 (last_stmt);
2714 else_clause = gimple_assign_rhs3 (last_stmt);
2716 if (!COMPARISON_CLASS_P (cond_expr))
2717 return NULL;
2719 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2720 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2721 if (comp_vectype == NULL_TREE)
2722 return NULL;
2724 type = gimple_expr_type (last_stmt);
2725 if (types_compatible_p (type, comp_scalar_type)
2726 || ((TREE_CODE (then_clause) != INTEGER_CST
2727 || TREE_CODE (else_clause) != INTEGER_CST)
2728 && !INTEGRAL_TYPE_P (comp_scalar_type))
2729 || !INTEGRAL_TYPE_P (type))
2730 return NULL;
2732 if ((TREE_CODE (then_clause) != INTEGER_CST
2733 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2734 &def_stmt0, &promotion))
2735 || (TREE_CODE (else_clause) != INTEGER_CST
2736 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2737 &def_stmt1, &promotion)))
2738 return NULL;
2740 if (orig_type0 && orig_type1
2741 && !types_compatible_p (orig_type0, orig_type1))
2742 return NULL;
2744 if (orig_type0)
2746 if (!types_compatible_p (orig_type0, comp_scalar_type))
2747 return NULL;
2748 then_clause = gimple_assign_rhs1 (def_stmt0);
2749 itype = orig_type0;
2752 if (orig_type1)
2754 if (!types_compatible_p (orig_type1, comp_scalar_type))
2755 return NULL;
2756 else_clause = gimple_assign_rhs1 (def_stmt1);
2757 itype = orig_type1;
2760 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2762 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2763 return NULL;
2765 vectype = get_vectype_for_scalar_type (type);
2766 if (vectype == NULL_TREE)
2767 return NULL;
2769 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2770 return NULL;
2772 if (itype == NULL_TREE)
2773 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2774 TYPE_UNSIGNED (type));
2776 if (itype == NULL_TREE
2777 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2778 return NULL;
2780 vecitype = get_vectype_for_scalar_type (itype);
2781 if (vecitype == NULL_TREE)
2782 return NULL;
2784 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2785 return NULL;
2787 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2789 if ((TREE_CODE (then_clause) == INTEGER_CST
2790 && !int_fits_type_p (then_clause, itype))
2791 || (TREE_CODE (else_clause) == INTEGER_CST
2792 && !int_fits_type_p (else_clause, itype)))
2793 return NULL;
2796 def_stmt
2797 = gimple_build_assign_with_ops (COND_EXPR,
2798 vect_recog_temp_ssa_var (itype, NULL),
2799 unshare_expr (cond_expr),
2800 fold_convert (itype, then_clause),
2801 fold_convert (itype, else_clause));
2802 pattern_stmt
2803 = gimple_build_assign_with_ops (NOP_EXPR,
2804 vect_recog_temp_ssa_var (type, NULL),
2805 gimple_assign_lhs (def_stmt));
2807 new_pattern_def_seq (stmt_vinfo, def_stmt);
2808 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2809 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2810 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2811 *type_in = vecitype;
2812 *type_out = vectype;
2814 if (dump_enabled_p ())
2815 dump_printf_loc (MSG_NOTE, vect_location,
2816 "vect_recog_mixed_size_cond_pattern: detected:\n");
2818 return pattern_stmt;
2822 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2823 true if bool VAR can be optimized that way. */
2825 static bool
2826 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2828 gimple def_stmt;
2829 enum vect_def_type dt;
2830 tree def, rhs1;
2831 enum tree_code rhs_code;
2833 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2834 &dt))
2835 return false;
2837 if (dt != vect_internal_def)
2838 return false;
2840 if (!is_gimple_assign (def_stmt))
2841 return false;
2843 if (!has_single_use (def))
2844 return false;
2846 rhs1 = gimple_assign_rhs1 (def_stmt);
2847 rhs_code = gimple_assign_rhs_code (def_stmt);
2848 switch (rhs_code)
2850 case SSA_NAME:
2851 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2853 CASE_CONVERT:
2854 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2855 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2856 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2857 return false;
2858 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2860 case BIT_NOT_EXPR:
2861 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2863 case BIT_AND_EXPR:
2864 case BIT_IOR_EXPR:
2865 case BIT_XOR_EXPR:
2866 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2867 return false;
2868 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2869 bb_vinfo);
2871 default:
2872 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2874 tree vecitype, comp_vectype;
2876 /* If the comparison can throw, then is_gimple_condexpr will be
2877 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2878 if (stmt_could_throw_p (def_stmt))
2879 return false;
2881 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2882 if (comp_vectype == NULL_TREE)
2883 return false;
2885 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2887 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2888 tree itype
2889 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2890 vecitype = get_vectype_for_scalar_type (itype);
2891 if (vecitype == NULL_TREE)
2892 return false;
2894 else
2895 vecitype = comp_vectype;
2896 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2898 return false;
2903 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2904 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2905 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2907 static tree
2908 adjust_bool_pattern_cast (tree type, tree var)
2910 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2911 gimple cast_stmt, pattern_stmt;
2913 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2914 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2915 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2916 cast_stmt
2917 = gimple_build_assign_with_ops (NOP_EXPR,
2918 vect_recog_temp_ssa_var (type, NULL),
2919 gimple_assign_lhs (pattern_stmt));
2920 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2921 return gimple_assign_lhs (cast_stmt);
2925 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2926 recursively. VAR is an SSA_NAME that should be transformed from bool
2927 to a wider integer type, OUT_TYPE is the desired final integer type of
2928 the whole pattern, TRUEVAL should be NULL unless optimizing
2929 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2930 in the then_clause, STMTS is where statements with added pattern stmts
2931 should be pushed to. */
2933 static tree
2934 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2935 vec<gimple> *stmts)
2937 gimple stmt = SSA_NAME_DEF_STMT (var);
2938 enum tree_code rhs_code, def_rhs_code;
2939 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2940 location_t loc;
2941 gimple pattern_stmt, def_stmt;
2943 rhs1 = gimple_assign_rhs1 (stmt);
2944 rhs2 = gimple_assign_rhs2 (stmt);
2945 rhs_code = gimple_assign_rhs_code (stmt);
2946 loc = gimple_location (stmt);
2947 switch (rhs_code)
2949 case SSA_NAME:
2950 CASE_CONVERT:
2951 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2952 itype = TREE_TYPE (irhs1);
2953 pattern_stmt
2954 = gimple_build_assign_with_ops (SSA_NAME,
2955 vect_recog_temp_ssa_var (itype, NULL),
2956 irhs1);
2957 break;
2959 case BIT_NOT_EXPR:
2960 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2961 itype = TREE_TYPE (irhs1);
2962 pattern_stmt
2963 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2964 vect_recog_temp_ssa_var (itype, NULL),
2965 irhs1, build_int_cst (itype, 1));
2966 break;
2968 case BIT_AND_EXPR:
2969 /* Try to optimize x = y & (a < b ? 1 : 0); into
2970 x = (a < b ? y : 0);
2972 E.g. for:
2973 bool a_b, b_b, c_b;
2974 TYPE d_T;
2976 S1 a_b = x1 CMP1 y1;
2977 S2 b_b = x2 CMP2 y2;
2978 S3 c_b = a_b & b_b;
2979 S4 d_T = (TYPE) c_b;
2981 we would normally emit:
2983 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2984 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2985 S3' c_T = a_T & b_T;
2986 S4' d_T = c_T;
2988 but we can save one stmt by using the
2989 result of one of the COND_EXPRs in the other COND_EXPR and leave
2990 BIT_AND_EXPR stmt out:
2992 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2993 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2994 S4' f_T = c_T;
2996 At least when VEC_COND_EXPR is implemented using masks
2997 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2998 computes the comparison masks and ands it, in one case with
2999 all ones vector, in the other case with a vector register.
3000 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3001 often more expensive. */
3002 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3003 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3004 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3006 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3007 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3008 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3009 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3011 gimple tstmt;
3012 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3013 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3014 tstmt = stmts->pop ();
3015 gcc_assert (tstmt == def_stmt);
3016 stmts->quick_push (stmt);
3017 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3018 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3019 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3020 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3021 return irhs2;
3023 else
3024 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3025 goto and_ior_xor;
3027 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3028 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3029 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3031 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3032 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3033 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3034 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3036 gimple tstmt;
3037 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3038 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3039 tstmt = stmts->pop ();
3040 gcc_assert (tstmt == def_stmt);
3041 stmts->quick_push (stmt);
3042 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3043 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3044 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3045 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3046 return irhs1;
3048 else
3049 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3050 goto and_ior_xor;
3052 /* FALLTHRU */
3053 case BIT_IOR_EXPR:
3054 case BIT_XOR_EXPR:
3055 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3056 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3057 and_ior_xor:
3058 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3059 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3061 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3062 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3063 int out_prec = TYPE_PRECISION (out_type);
3064 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3065 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3066 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3067 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3068 else
3070 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3071 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3074 itype = TREE_TYPE (irhs1);
3075 pattern_stmt
3076 = gimple_build_assign_with_ops (rhs_code,
3077 vect_recog_temp_ssa_var (itype, NULL),
3078 irhs1, irhs2);
3079 break;
3081 default:
3082 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3083 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3084 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3085 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3086 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3088 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3089 itype
3090 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3092 else
3093 itype = TREE_TYPE (rhs1);
3094 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3095 if (trueval == NULL_TREE)
3096 trueval = build_int_cst (itype, 1);
3097 else
3098 gcc_checking_assert (useless_type_conversion_p (itype,
3099 TREE_TYPE (trueval)));
3100 pattern_stmt
3101 = gimple_build_assign_with_ops (COND_EXPR,
3102 vect_recog_temp_ssa_var (itype, NULL),
3103 cond_expr, trueval,
3104 build_int_cst (itype, 0));
3105 break;
3108 stmts->safe_push (stmt);
3109 gimple_set_location (pattern_stmt, loc);
3110 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3111 return gimple_assign_lhs (pattern_stmt);
3115 /* Function vect_recog_bool_pattern
3117 Try to find pattern like following:
3119 bool a_b, b_b, c_b, d_b, e_b;
3120 TYPE f_T;
3121 loop:
3122 S1 a_b = x1 CMP1 y1;
3123 S2 b_b = x2 CMP2 y2;
3124 S3 c_b = a_b & b_b;
3125 S4 d_b = x3 CMP3 y3;
3126 S5 e_b = c_b | d_b;
3127 S6 f_T = (TYPE) e_b;
3129 where type 'TYPE' is an integral type. Or a similar pattern
3130 ending in
3132 S6 f_Y = e_b ? r_Y : s_Y;
3134 as results from if-conversion of a complex condition.
3136 Input:
3138 * LAST_STMT: A stmt at the end from which the pattern
3139 search begins, i.e. cast of a bool to
3140 an integer type.
3142 Output:
3144 * TYPE_IN: The type of the input arguments to the pattern.
3146 * TYPE_OUT: The type of the output of this pattern.
3148 * Return value: A new stmt that will be used to replace the pattern.
3150 Assuming size of TYPE is the same as size of all comparisons
3151 (otherwise some casts would be added where needed), the above
3152 sequence we create related pattern stmts:
3153 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3154 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3155 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3156 S5' e_T = c_T | d_T;
3157 S6' f_T = e_T;
3159 Instead of the above S3' we could emit:
3160 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3161 S3' c_T = a_T | b_T;
3162 but the above is more efficient. */
3164 static gimple
3165 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
3166 tree *type_out)
3168 gimple last_stmt = stmts->pop ();
3169 enum tree_code rhs_code;
3170 tree var, lhs, rhs, vectype;
3171 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3172 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3173 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
3174 gimple pattern_stmt;
3176 if (!is_gimple_assign (last_stmt))
3177 return NULL;
3179 var = gimple_assign_rhs1 (last_stmt);
3180 lhs = gimple_assign_lhs (last_stmt);
3182 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3183 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3184 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3185 return NULL;
3187 rhs_code = gimple_assign_rhs_code (last_stmt);
3188 if (CONVERT_EXPR_CODE_P (rhs_code))
3190 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3191 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3192 return NULL;
3193 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3194 if (vectype == NULL_TREE)
3195 return NULL;
3197 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3198 return NULL;
3200 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3201 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3202 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3203 pattern_stmt
3204 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs);
3205 else
3206 pattern_stmt
3207 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs);
3208 *type_out = vectype;
3209 *type_in = vectype;
3210 stmts->safe_push (last_stmt);
3211 if (dump_enabled_p ())
3212 dump_printf_loc (MSG_NOTE, vect_location,
3213 "vect_recog_bool_pattern: detected:\n");
3215 return pattern_stmt;
3217 else if (rhs_code == COND_EXPR
3218 && TREE_CODE (var) == SSA_NAME)
3220 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3221 if (vectype == NULL_TREE)
3222 return NULL;
3224 /* Build a scalar type for the boolean result that when
3225 vectorized matches the vector type of the result in
3226 size and number of elements. */
3227 unsigned prec
3228 = wi::udiv_trunc (TYPE_SIZE (vectype),
3229 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3230 tree type
3231 = build_nonstandard_integer_type (prec,
3232 TYPE_UNSIGNED (TREE_TYPE (var)));
3233 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3234 return NULL;
3236 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3237 return NULL;
3239 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3240 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3241 pattern_stmt
3242 = gimple_build_assign_with_ops (COND_EXPR, lhs,
3243 build2 (NE_EXPR, boolean_type_node,
3244 rhs, build_int_cst (type, 0)),
3245 gimple_assign_rhs2 (last_stmt),
3246 gimple_assign_rhs3 (last_stmt));
3247 *type_out = vectype;
3248 *type_in = vectype;
3249 stmts->safe_push (last_stmt);
3250 if (dump_enabled_p ())
3251 dump_printf_loc (MSG_NOTE, vect_location,
3252 "vect_recog_bool_pattern: detected:\n");
3254 return pattern_stmt;
3256 else if (rhs_code == SSA_NAME
3257 && STMT_VINFO_DATA_REF (stmt_vinfo))
3259 stmt_vec_info pattern_stmt_info;
3260 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3261 gcc_assert (vectype != NULL_TREE);
3262 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3263 return NULL;
3264 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3265 return NULL;
3267 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3268 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3269 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3271 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3272 gimple cast_stmt
3273 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs);
3274 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3275 rhs = rhs2;
3277 pattern_stmt
3278 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs);
3279 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3280 bb_vinfo);
3281 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3282 STMT_VINFO_DATA_REF (pattern_stmt_info)
3283 = STMT_VINFO_DATA_REF (stmt_vinfo);
3284 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3285 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3286 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3287 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3288 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3289 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3290 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3291 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3292 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3293 *type_out = vectype;
3294 *type_in = vectype;
3295 stmts->safe_push (last_stmt);
3296 if (dump_enabled_p ())
3297 dump_printf_loc (MSG_NOTE, vect_location,
3298 "vect_recog_bool_pattern: detected:\n");
3299 return pattern_stmt;
3301 else
3302 return NULL;
3306 /* Mark statements that are involved in a pattern. */
3308 static inline void
3309 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3310 tree pattern_vectype)
3312 stmt_vec_info pattern_stmt_info, def_stmt_info;
3313 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3314 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3315 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3316 gimple def_stmt;
3318 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3319 if (pattern_stmt_info == NULL)
3321 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3322 bb_vinfo);
3323 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3325 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3327 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3328 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3329 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3330 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3331 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3332 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3333 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3334 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3335 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3337 gimple_stmt_iterator si;
3338 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3339 !gsi_end_p (si); gsi_next (&si))
3341 def_stmt = gsi_stmt (si);
3342 def_stmt_info = vinfo_for_stmt (def_stmt);
3343 if (def_stmt_info == NULL)
3345 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3346 bb_vinfo);
3347 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3349 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3350 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3351 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3352 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3353 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3358 /* Function vect_pattern_recog_1
3360 Input:
3361 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3362 computation pattern.
3363 STMT: A stmt from which the pattern search should start.
3365 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3366 expression that computes the same functionality and can be used to
3367 replace the sequence of stmts that are involved in the pattern.
3369 Output:
3370 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3371 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3372 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3373 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3374 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3375 to the available target pattern.
3377 This function also does some bookkeeping, as explained in the documentation
3378 for vect_recog_pattern. */
3380 static void
3381 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3382 gimple_stmt_iterator si,
3383 vec<gimple> *stmts_to_replace)
3385 gimple stmt = gsi_stmt (si), pattern_stmt;
3386 stmt_vec_info stmt_info;
3387 loop_vec_info loop_vinfo;
3388 tree pattern_vectype;
3389 tree type_in, type_out;
3390 enum tree_code code;
3391 int i;
3392 gimple next;
3394 stmts_to_replace->truncate (0);
3395 stmts_to_replace->quick_push (stmt);
3396 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3397 if (!pattern_stmt)
3398 return;
3400 stmt = stmts_to_replace->last ();
3401 stmt_info = vinfo_for_stmt (stmt);
3402 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3404 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3406 /* No need to check target support (already checked by the pattern
3407 recognition function). */
3408 pattern_vectype = type_out ? type_out : type_in;
3410 else
3412 machine_mode vec_mode;
3413 enum insn_code icode;
3414 optab optab;
3416 /* Check target support */
3417 type_in = get_vectype_for_scalar_type (type_in);
3418 if (!type_in)
3419 return;
3420 if (type_out)
3421 type_out = get_vectype_for_scalar_type (type_out);
3422 else
3423 type_out = type_in;
3424 if (!type_out)
3425 return;
3426 pattern_vectype = type_out;
3428 if (is_gimple_assign (pattern_stmt))
3429 code = gimple_assign_rhs_code (pattern_stmt);
3430 else
3432 gcc_assert (is_gimple_call (pattern_stmt));
3433 code = CALL_EXPR;
3436 optab = optab_for_tree_code (code, type_in, optab_default);
3437 vec_mode = TYPE_MODE (type_in);
3438 if (!optab
3439 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3440 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3441 return;
3444 /* Found a vectorizable pattern. */
3445 if (dump_enabled_p ())
3447 dump_printf_loc (MSG_NOTE, vect_location,
3448 "pattern recognized: ");
3449 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3450 dump_printf (MSG_NOTE, "\n");
3453 /* Mark the stmts that are involved in the pattern. */
3454 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3456 /* Patterns cannot be vectorized using SLP, because they change the order of
3457 computation. */
3458 if (loop_vinfo)
3459 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3460 if (next == stmt)
3461 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3463 /* It is possible that additional pattern stmts are created and inserted in
3464 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3465 relevant statements. */
3466 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3467 && (unsigned) i < (stmts_to_replace->length () - 1);
3468 i++)
3470 stmt_info = vinfo_for_stmt (stmt);
3471 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3472 if (dump_enabled_p ())
3474 dump_printf_loc (MSG_NOTE, vect_location,
3475 "additional pattern stmt: ");
3476 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3477 dump_printf (MSG_NOTE, "\n");
3480 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3485 /* Function vect_pattern_recog
3487 Input:
3488 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3489 computation idioms.
3491 Output - for each computation idiom that is detected we create a new stmt
3492 that provides the same functionality and that can be vectorized. We
3493 also record some information in the struct_stmt_info of the relevant
3494 stmts, as explained below:
3496 At the entry to this function we have the following stmts, with the
3497 following initial value in the STMT_VINFO fields:
3499 stmt in_pattern_p related_stmt vec_stmt
3500 S1: a_i = .... - - -
3501 S2: a_2 = ..use(a_i).. - - -
3502 S3: a_1 = ..use(a_2).. - - -
3503 S4: a_0 = ..use(a_1).. - - -
3504 S5: ... = ..use(a_0).. - - -
3506 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3507 represented by a single stmt. We then:
3508 - create a new stmt S6 equivalent to the pattern (the stmt is not
3509 inserted into the code)
3510 - fill in the STMT_VINFO fields as follows:
3512 in_pattern_p related_stmt vec_stmt
3513 S1: a_i = .... - - -
3514 S2: a_2 = ..use(a_i).. - - -
3515 S3: a_1 = ..use(a_2).. - - -
3516 S4: a_0 = ..use(a_1).. true S6 -
3517 '---> S6: a_new = .... - S4 -
3518 S5: ... = ..use(a_0).. - - -
3520 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3521 to each other through the RELATED_STMT field).
3523 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3524 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3525 remain irrelevant unless used by stmts other than S4.
3527 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3528 (because they are marked as irrelevant). It will vectorize S6, and record
3529 a pointer to the new vector stmt VS6 from S6 (as usual).
3530 S4 will be skipped, and S5 will be vectorized as usual:
3532 in_pattern_p related_stmt vec_stmt
3533 S1: a_i = .... - - -
3534 S2: a_2 = ..use(a_i).. - - -
3535 S3: a_1 = ..use(a_2).. - - -
3536 > VS6: va_new = .... - - -
3537 S4: a_0 = ..use(a_1).. true S6 VS6
3538 '---> S6: a_new = .... - S4 VS6
3539 > VS5: ... = ..vuse(va_new).. - - -
3540 S5: ... = ..use(a_0).. - - -
3542 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3543 elsewhere), and we'll end up with:
3545 VS6: va_new = ....
3546 VS5: ... = ..vuse(va_new)..
3548 In case of more than one pattern statements, e.g., widen-mult with
3549 intermediate type:
3551 S1 a_t = ;
3552 S2 a_T = (TYPE) a_t;
3553 '--> S3: a_it = (interm_type) a_t;
3554 S4 prod_T = a_T * CONST;
3555 '--> S5: prod_T' = a_it w* CONST;
3557 there may be other users of a_T outside the pattern. In that case S2 will
3558 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3559 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3560 be recorded in S3. */
3562 void
3563 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3565 struct loop *loop;
3566 basic_block *bbs;
3567 unsigned int nbbs;
3568 gimple_stmt_iterator si;
3569 unsigned int i, j;
3570 vect_recog_func_ptr vect_recog_func;
3571 auto_vec<gimple, 1> stmts_to_replace;
3572 gimple stmt;
3574 if (dump_enabled_p ())
3575 dump_printf_loc (MSG_NOTE, vect_location,
3576 "=== vect_pattern_recog ===\n");
3578 if (loop_vinfo)
3580 loop = LOOP_VINFO_LOOP (loop_vinfo);
3581 bbs = LOOP_VINFO_BBS (loop_vinfo);
3582 nbbs = loop->num_nodes;
3584 else
3586 bbs = &BB_VINFO_BB (bb_vinfo);
3587 nbbs = 1;
3590 /* Scan through the loop stmts, applying the pattern recognition
3591 functions starting at each stmt visited: */
3592 for (i = 0; i < nbbs; i++)
3594 basic_block bb = bbs[i];
3595 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3597 if (bb_vinfo && (stmt = gsi_stmt (si))
3598 && vinfo_for_stmt (stmt)
3599 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3600 continue;
3602 /* Scan over all generic vect_recog_xxx_pattern functions. */
3603 for (j = 0; j < NUM_PATTERNS; j++)
3605 vect_recog_func = vect_vect_recog_func_ptrs[j];
3606 vect_pattern_recog_1 (vect_recog_func, si,
3607 &stmts_to_replace);