* include/parallel/numeric.h: Do not use default arguments in function
[official-gcc.git] / gcc / tree-vect-patterns.c
blob24ebba2381b794999cf9d94a154afff9bc8c2760
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "stor-layout.h"
27 #include "target.h"
28 #include "predict.h"
29 #include "vec.h"
30 #include "hashtab.h"
31 #include "hash-set.h"
32 #include "machmode.h"
33 #include "hard-reg-set.h"
34 #include "input.h"
35 #include "function.h"
36 #include "dominance.h"
37 #include "basic-block.h"
38 #include "gimple-pretty-print.h"
39 #include "tree-ssa-alias.h"
40 #include "internal-fn.h"
41 #include "tree-eh.h"
42 #include "gimple-expr.h"
43 #include "is-a.h"
44 #include "gimple.h"
45 #include "gimplify.h"
46 #include "gimple-iterator.h"
47 #include "gimple-ssa.h"
48 #include "tree-phinodes.h"
49 #include "ssa-iterators.h"
50 #include "stringpool.h"
51 #include "tree-ssanames.h"
52 #include "cfgloop.h"
53 #include "expr.h"
54 #include "insn-codes.h"
55 #include "optabs.h"
56 #include "params.h"
57 #include "tree-data-ref.h"
58 #include "tree-vectorizer.h"
59 #include "recog.h" /* FIXME: for insn_data */
60 #include "diagnostic-core.h"
61 #include "dumpfile.h"
62 #include "builtins.h"
64 /* Pattern recognition functions */
65 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
66 tree *);
67 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
68 tree *);
69 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
70 tree *);
71 static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
72 tree *);
73 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
74 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
75 tree *);
76 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
77 tree *, tree *);
78 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
79 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
80 tree *, tree *);
81 static gimple vect_recog_divmod_pattern (vec<gimple> *,
82 tree *, tree *);
83 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
84 tree *, tree *);
85 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
86 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
87 vect_recog_widen_mult_pattern,
88 vect_recog_widen_sum_pattern,
89 vect_recog_dot_prod_pattern,
90 vect_recog_sad_pattern,
91 vect_recog_pow_pattern,
92 vect_recog_widen_shift_pattern,
93 vect_recog_over_widening_pattern,
94 vect_recog_rotate_pattern,
95 vect_recog_vector_vector_shift_pattern,
96 vect_recog_divmod_pattern,
97 vect_recog_mixed_size_cond_pattern,
98 vect_recog_bool_pattern};
100 static inline void
101 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
103 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
104 stmt);
107 static inline void
108 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
110 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
111 append_pattern_def_seq (stmt_info, stmt);
114 /* Check whether STMT2 is in the same loop or basic block as STMT1.
115 Which of the two applies depends on whether we're currently doing
116 loop-based or basic-block-based vectorization, as determined by
117 the vinfo_for_stmt for STMT1 (which must be defined).
119 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
120 to be defined as well. */
122 static bool
123 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
125 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
126 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
127 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
129 if (!gimple_bb (stmt2))
130 return false;
132 if (loop_vinfo)
134 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
135 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
136 return false;
138 else
140 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
141 || gimple_code (stmt2) == GIMPLE_PHI)
142 return false;
145 gcc_assert (vinfo_for_stmt (stmt2));
146 return true;
149 /* If the LHS of DEF_STMT has a single use, and that statement is
150 in the same loop or basic block, return it. */
152 static gimple
153 vect_single_imm_use (gimple def_stmt)
155 tree lhs = gimple_assign_lhs (def_stmt);
156 use_operand_p use_p;
157 gimple use_stmt;
159 if (!single_imm_use (lhs, &use_p, &use_stmt))
160 return NULL;
162 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
163 return NULL;
165 return use_stmt;
168 /* Check whether NAME, an ssa-name used in USE_STMT,
169 is a result of a type promotion, such that:
170 DEF_STMT: NAME = NOP (name0)
171 If CHECK_SIGN is TRUE, check that either both types are signed or both are
172 unsigned. */
174 static bool
175 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
176 tree *orig_type, gimple *def_stmt, bool *promotion)
178 tree dummy;
179 gimple dummy_gimple;
180 loop_vec_info loop_vinfo;
181 stmt_vec_info stmt_vinfo;
182 tree type = TREE_TYPE (name);
183 tree oprnd0;
184 enum vect_def_type dt;
185 tree def;
186 bb_vec_info bb_vinfo;
188 stmt_vinfo = vinfo_for_stmt (use_stmt);
189 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
190 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
191 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
192 &def, &dt))
193 return false;
195 if (dt != vect_internal_def
196 && dt != vect_external_def && dt != vect_constant_def)
197 return false;
199 if (!*def_stmt)
200 return false;
202 if (!is_gimple_assign (*def_stmt))
203 return false;
205 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
206 return false;
208 oprnd0 = gimple_assign_rhs1 (*def_stmt);
210 *orig_type = TREE_TYPE (oprnd0);
211 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
212 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
213 return false;
215 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
216 *promotion = true;
217 else
218 *promotion = false;
220 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
221 bb_vinfo, &dummy_gimple, &dummy, &dt))
222 return false;
224 return true;
227 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
228 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
230 static tree
231 vect_recog_temp_ssa_var (tree type, gimple stmt)
233 return make_temp_ssa_name (type, stmt, "patt");
236 /* Function vect_recog_dot_prod_pattern
238 Try to find the following pattern:
240 type x_t, y_t;
241 TYPE1 prod;
242 TYPE2 sum = init;
243 loop:
244 sum_0 = phi <init, sum_1>
245 S1 x_t = ...
246 S2 y_t = ...
247 S3 x_T = (TYPE1) x_t;
248 S4 y_T = (TYPE1) y_t;
249 S5 prod = x_T * y_T;
250 [S6 prod = (TYPE2) prod; #optional]
251 S7 sum_1 = prod + sum_0;
253 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
254 same size of 'TYPE1' or bigger. This is a special case of a reduction
255 computation.
257 Input:
259 * STMTS: Contains a stmt from which the pattern search begins. In the
260 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
261 will be detected.
263 Output:
265 * TYPE_IN: The type of the input arguments to the pattern.
267 * TYPE_OUT: The type of the output of this pattern.
269 * Return value: A new stmt that will be used to replace the sequence of
270 stmts that constitute the pattern. In this case it will be:
271 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
273 Note: The dot-prod idiom is a widening reduction pattern that is
274 vectorized without preserving all the intermediate results. It
275 produces only N/2 (widened) results (by summing up pairs of
276 intermediate results) rather than all N results. Therefore, we
277 cannot allow this pattern when we want to get all the results and in
278 the correct order (as is the case when this computation is in an
279 inner-loop nested in an outer-loop that us being vectorized). */
281 static gimple
282 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
283 tree *type_out)
285 gimple stmt, last_stmt = (*stmts)[0];
286 tree oprnd0, oprnd1;
287 tree oprnd00, oprnd01;
288 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
289 tree type, half_type;
290 gimple pattern_stmt;
291 tree prod_type;
292 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
293 struct loop *loop;
294 tree var;
295 bool promotion;
297 if (!loop_info)
298 return NULL;
300 loop = LOOP_VINFO_LOOP (loop_info);
302 if (!is_gimple_assign (last_stmt))
303 return NULL;
305 type = gimple_expr_type (last_stmt);
307 /* Look for the following pattern
308 DX = (TYPE1) X;
309 DY = (TYPE1) Y;
310 DPROD = DX * DY;
311 DDPROD = (TYPE2) DPROD;
312 sum_1 = DDPROD + sum_0;
313 In which
314 - DX is double the size of X
315 - DY is double the size of Y
316 - DX, DY, DPROD all have the same type
317 - sum is the same size of DPROD or bigger
318 - sum has been recognized as a reduction variable.
320 This is equivalent to:
321 DPROD = X w* Y; #widen mult
322 sum_1 = DPROD w+ sum_0; #widen summation
324 DPROD = X w* Y; #widen mult
325 sum_1 = DPROD + sum_0; #summation
328 /* Starting from LAST_STMT, follow the defs of its uses in search
329 of the above pattern. */
331 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
332 return NULL;
334 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
336 /* Has been detected as widening-summation? */
338 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
339 type = gimple_expr_type (stmt);
340 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
341 return NULL;
342 oprnd0 = gimple_assign_rhs1 (stmt);
343 oprnd1 = gimple_assign_rhs2 (stmt);
344 half_type = TREE_TYPE (oprnd0);
346 else
348 gimple def_stmt;
350 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
351 return NULL;
352 oprnd0 = gimple_assign_rhs1 (last_stmt);
353 oprnd1 = gimple_assign_rhs2 (last_stmt);
354 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
355 || !types_compatible_p (TREE_TYPE (oprnd1), type))
356 return NULL;
357 stmt = last_stmt;
359 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
360 &promotion)
361 && promotion)
363 stmt = def_stmt;
364 oprnd0 = gimple_assign_rhs1 (stmt);
366 else
367 half_type = type;
370 /* So far so good. Since last_stmt was detected as a (summation) reduction,
371 we know that oprnd1 is the reduction variable (defined by a loop-header
372 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
373 Left to check that oprnd0 is defined by a (widen_)mult_expr */
374 if (TREE_CODE (oprnd0) != SSA_NAME)
375 return NULL;
377 prod_type = half_type;
378 stmt = SSA_NAME_DEF_STMT (oprnd0);
380 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
381 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
382 return NULL;
384 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
385 inside the loop (in case we are analyzing an outer-loop). */
386 if (!is_gimple_assign (stmt))
387 return NULL;
388 stmt_vinfo = vinfo_for_stmt (stmt);
389 gcc_assert (stmt_vinfo);
390 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
391 return NULL;
392 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
393 return NULL;
394 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
396 /* Has been detected as a widening multiplication? */
398 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
399 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
400 return NULL;
401 stmt_vinfo = vinfo_for_stmt (stmt);
402 gcc_assert (stmt_vinfo);
403 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
404 oprnd00 = gimple_assign_rhs1 (stmt);
405 oprnd01 = gimple_assign_rhs2 (stmt);
406 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
407 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
409 else
411 tree half_type0, half_type1;
412 gimple def_stmt;
413 tree oprnd0, oprnd1;
415 oprnd0 = gimple_assign_rhs1 (stmt);
416 oprnd1 = gimple_assign_rhs2 (stmt);
417 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
418 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
419 return NULL;
420 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
421 &promotion)
422 || !promotion)
423 return NULL;
424 oprnd00 = gimple_assign_rhs1 (def_stmt);
425 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
426 &promotion)
427 || !promotion)
428 return NULL;
429 oprnd01 = gimple_assign_rhs1 (def_stmt);
430 if (!types_compatible_p (half_type0, half_type1))
431 return NULL;
432 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
433 return NULL;
436 half_type = TREE_TYPE (oprnd00);
437 *type_in = half_type;
438 *type_out = type;
440 /* Pattern detected. Create a stmt to be used to replace the pattern: */
441 var = vect_recog_temp_ssa_var (type, NULL);
442 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
443 oprnd00, oprnd01, oprnd1);
445 if (dump_enabled_p ())
447 dump_printf_loc (MSG_NOTE, vect_location,
448 "vect_recog_dot_prod_pattern: detected: ");
449 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
450 dump_printf (MSG_NOTE, "\n");
453 /* We don't allow changing the order of the computation in the inner-loop
454 when doing outer-loop vectorization. */
455 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
457 return pattern_stmt;
461 /* Function vect_recog_sad_pattern
463 Try to find the following Sum of Absolute Difference (SAD) pattern:
465 type x_t, y_t;
466 signed TYPE1 diff, abs_diff;
467 TYPE2 sum = init;
468 loop:
469 sum_0 = phi <init, sum_1>
470 S1 x_t = ...
471 S2 y_t = ...
472 S3 x_T = (TYPE1) x_t;
473 S4 y_T = (TYPE1) y_t;
474 S5 diff = x_T - y_T;
475 S6 abs_diff = ABS_EXPR <diff>;
476 [S7 abs_diff = (TYPE2) abs_diff; #optional]
477 S8 sum_1 = abs_diff + sum_0;
479 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
480 same size of 'TYPE1' or bigger. This is a special case of a reduction
481 computation.
483 Input:
485 * STMTS: Contains a stmt from which the pattern search begins. In the
486 example, when this function is called with S8, the pattern
487 {S3,S4,S5,S6,S7,S8} will be detected.
489 Output:
491 * TYPE_IN: The type of the input arguments to the pattern.
493 * TYPE_OUT: The type of the output of this pattern.
495 * Return value: A new stmt that will be used to replace the sequence of
496 stmts that constitute the pattern. In this case it will be:
497 SAD_EXPR <x_t, y_t, sum_0>
500 static gimple
501 vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
502 tree *type_out)
504 gimple last_stmt = (*stmts)[0];
505 tree sad_oprnd0, sad_oprnd1;
506 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
507 tree half_type;
508 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
509 struct loop *loop;
510 bool promotion;
512 if (!loop_info)
513 return NULL;
515 loop = LOOP_VINFO_LOOP (loop_info);
517 if (!is_gimple_assign (last_stmt))
518 return NULL;
520 tree sum_type = gimple_expr_type (last_stmt);
522 /* Look for the following pattern
523 DX = (TYPE1) X;
524 DY = (TYPE1) Y;
525 DDIFF = DX - DY;
526 DAD = ABS_EXPR <DDIFF>;
527 DDPROD = (TYPE2) DPROD;
528 sum_1 = DAD + sum_0;
529 In which
530 - DX is at least double the size of X
531 - DY is at least double the size of Y
532 - DX, DY, DDIFF, DAD all have the same type
533 - sum is the same size of DAD or bigger
534 - sum has been recognized as a reduction variable.
536 This is equivalent to:
537 DDIFF = X w- Y; #widen sub
538 DAD = ABS_EXPR <DDIFF>;
539 sum_1 = DAD w+ sum_0; #widen summation
541 DDIFF = X w- Y; #widen sub
542 DAD = ABS_EXPR <DDIFF>;
543 sum_1 = DAD + sum_0; #summation
546 /* Starting from LAST_STMT, follow the defs of its uses in search
547 of the above pattern. */
549 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
550 return NULL;
552 tree plus_oprnd0, plus_oprnd1;
554 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
556 /* Has been detected as widening-summation? */
558 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
559 sum_type = gimple_expr_type (stmt);
560 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
561 return NULL;
562 plus_oprnd0 = gimple_assign_rhs1 (stmt);
563 plus_oprnd1 = gimple_assign_rhs2 (stmt);
564 half_type = TREE_TYPE (plus_oprnd0);
566 else
568 gimple def_stmt;
570 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
571 return NULL;
572 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
573 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
574 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
575 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
576 return NULL;
578 /* The type conversion could be promotion, demotion,
579 or just signed -> unsigned. */
580 if (type_conversion_p (plus_oprnd0, last_stmt, false,
581 &half_type, &def_stmt, &promotion))
582 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
583 else
584 half_type = sum_type;
587 /* So far so good. Since last_stmt was detected as a (summation) reduction,
588 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
589 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
590 Then check that plus_oprnd0 is defined by an abs_expr. */
592 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
593 return NULL;
595 tree abs_type = half_type;
596 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
598 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
599 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
600 return NULL;
602 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
603 inside the loop (in case we are analyzing an outer-loop). */
604 if (!is_gimple_assign (abs_stmt))
605 return NULL;
607 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
608 gcc_assert (abs_stmt_vinfo);
609 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
610 return NULL;
611 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
612 return NULL;
614 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
615 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
616 return NULL;
617 if (TYPE_UNSIGNED (abs_type))
618 return NULL;
620 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
622 if (TREE_CODE (abs_oprnd) != SSA_NAME)
623 return NULL;
625 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
627 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
628 if (!gimple_bb (diff_stmt)
629 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
630 return NULL;
632 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
633 inside the loop (in case we are analyzing an outer-loop). */
634 if (!is_gimple_assign (diff_stmt))
635 return NULL;
637 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
638 gcc_assert (diff_stmt_vinfo);
639 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
640 return NULL;
641 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
642 return NULL;
644 tree half_type0, half_type1;
645 gimple def_stmt;
647 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
648 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
650 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
651 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
652 return NULL;
653 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
654 &half_type0, &def_stmt, &promotion)
655 || !promotion)
656 return NULL;
657 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
659 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
660 &half_type1, &def_stmt, &promotion)
661 || !promotion)
662 return NULL;
663 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
665 if (!types_compatible_p (half_type0, half_type1))
666 return NULL;
667 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
668 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
669 return NULL;
671 *type_in = TREE_TYPE (sad_oprnd0);
672 *type_out = sum_type;
674 /* Pattern detected. Create a stmt to be used to replace the pattern: */
675 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
676 gimple pattern_stmt = gimple_build_assign_with_ops
677 (SAD_EXPR, var, sad_oprnd0, sad_oprnd1, plus_oprnd1);
679 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location,
682 "vect_recog_sad_pattern: detected: ");
683 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
684 dump_printf (MSG_NOTE, "\n");
687 /* We don't allow changing the order of the computation in the inner-loop
688 when doing outer-loop vectorization. */
689 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
691 return pattern_stmt;
695 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
696 and LSHIFT_EXPR.
698 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
699 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
701 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
702 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
703 that satisfies the above restrictions, we can perform a widening opeartion
704 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
705 with a_it = (interm_type) a_t; */
707 static bool
708 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
709 tree const_oprnd, tree *oprnd,
710 vec<gimple> *stmts, tree type,
711 tree *half_type, gimple def_stmt)
713 tree new_type, new_oprnd;
714 gimple new_stmt;
716 if (code != MULT_EXPR && code != LSHIFT_EXPR)
717 return false;
719 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
720 || (code == LSHIFT_EXPR
721 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
722 != 1))
723 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
725 /* CONST_OPRND is a constant of HALF_TYPE. */
726 *oprnd = gimple_assign_rhs1 (def_stmt);
727 return true;
730 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
731 return false;
733 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
734 return false;
736 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
737 a type 2 times bigger than HALF_TYPE. */
738 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
739 TYPE_UNSIGNED (type));
740 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
741 || (code == LSHIFT_EXPR
742 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
743 return false;
745 /* Use NEW_TYPE for widening operation. */
746 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
748 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
749 /* Check if the already created pattern stmt is what we need. */
750 if (!is_gimple_assign (new_stmt)
751 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
752 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
753 return false;
755 stmts->safe_push (def_stmt);
756 *oprnd = gimple_assign_lhs (new_stmt);
758 else
760 /* Create a_T = (NEW_TYPE) a_t; */
761 *oprnd = gimple_assign_rhs1 (def_stmt);
762 new_oprnd = make_ssa_name (new_type, NULL);
763 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
764 NULL_TREE);
765 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
766 stmts->safe_push (def_stmt);
767 *oprnd = new_oprnd;
770 *half_type = new_type;
771 return true;
775 /* Function vect_recog_widen_mult_pattern
777 Try to find the following pattern:
779 type1 a_t;
780 type2 b_t;
781 TYPE a_T, b_T, prod_T;
783 S1 a_t = ;
784 S2 b_t = ;
785 S3 a_T = (TYPE) a_t;
786 S4 b_T = (TYPE) b_t;
787 S5 prod_T = a_T * b_T;
789 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
791 Also detect unsigned cases:
793 unsigned type1 a_t;
794 unsigned type2 b_t;
795 unsigned TYPE u_prod_T;
796 TYPE a_T, b_T, prod_T;
798 S1 a_t = ;
799 S2 b_t = ;
800 S3 a_T = (TYPE) a_t;
801 S4 b_T = (TYPE) b_t;
802 S5 prod_T = a_T * b_T;
803 S6 u_prod_T = (unsigned TYPE) prod_T;
805 and multiplication by constants:
807 type a_t;
808 TYPE a_T, prod_T;
810 S1 a_t = ;
811 S3 a_T = (TYPE) a_t;
812 S5 prod_T = a_T * CONST;
814 A special case of multiplication by constants is when 'TYPE' is 4 times
815 bigger than 'type', but CONST fits an intermediate type 2 times smaller
816 than 'TYPE'. In that case we create an additional pattern stmt for S3
817 to create a variable of the intermediate type, and perform widen-mult
818 on the intermediate type as well:
820 type a_t;
821 interm_type a_it;
822 TYPE a_T, prod_T, prod_T';
824 S1 a_t = ;
825 S3 a_T = (TYPE) a_t;
826 '--> a_it = (interm_type) a_t;
827 S5 prod_T = a_T * CONST;
828 '--> prod_T' = a_it w* CONST;
830 Input/Output:
832 * STMTS: Contains a stmt from which the pattern search begins. In the
833 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
834 is detected. In case of unsigned widen-mult, the original stmt (S5) is
835 replaced with S6 in STMTS. In case of multiplication by a constant
836 of an intermediate type (the last case above), STMTS also contains S3
837 (inserted before S5).
839 Output:
841 * TYPE_IN: The type of the input arguments to the pattern.
843 * TYPE_OUT: The type of the output of this pattern.
845 * Return value: A new stmt that will be used to replace the sequence of
846 stmts that constitute the pattern. In this case it will be:
847 WIDEN_MULT <a_t, b_t>
848 If the result of WIDEN_MULT needs to be converted to a larger type, the
849 returned stmt will be this type conversion stmt.
852 static gimple
853 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
854 tree *type_in, tree *type_out)
856 gimple last_stmt = stmts->pop ();
857 gimple def_stmt0, def_stmt1;
858 tree oprnd0, oprnd1;
859 tree type, half_type0, half_type1;
860 gimple new_stmt = NULL, pattern_stmt = NULL;
861 tree vectype, vecitype;
862 tree var;
863 enum tree_code dummy_code;
864 int dummy_int;
865 vec<tree> dummy_vec;
866 bool op1_ok;
867 bool promotion;
869 if (!is_gimple_assign (last_stmt))
870 return NULL;
872 type = gimple_expr_type (last_stmt);
874 /* Starting from LAST_STMT, follow the defs of its uses in search
875 of the above pattern. */
877 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
878 return NULL;
880 oprnd0 = gimple_assign_rhs1 (last_stmt);
881 oprnd1 = gimple_assign_rhs2 (last_stmt);
882 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
883 || !types_compatible_p (TREE_TYPE (oprnd1), type))
884 return NULL;
886 /* Check argument 0. */
887 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
888 &promotion)
889 || !promotion)
890 return NULL;
891 /* Check argument 1. */
892 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
893 &def_stmt1, &promotion);
895 if (op1_ok && promotion)
897 oprnd0 = gimple_assign_rhs1 (def_stmt0);
898 oprnd1 = gimple_assign_rhs1 (def_stmt1);
900 else
902 if (TREE_CODE (oprnd1) == INTEGER_CST
903 && TREE_CODE (half_type0) == INTEGER_TYPE
904 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
905 &oprnd0, stmts, type,
906 &half_type0, def_stmt0))
908 half_type1 = half_type0;
909 oprnd1 = fold_convert (half_type1, oprnd1);
911 else
912 return NULL;
915 /* If the two arguments have different sizes, convert the one with
916 the smaller type into the larger type. */
917 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
919 tree* oprnd = NULL;
920 gimple def_stmt = NULL;
922 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
924 def_stmt = def_stmt0;
925 half_type0 = half_type1;
926 oprnd = &oprnd0;
928 else
930 def_stmt = def_stmt1;
931 half_type1 = half_type0;
932 oprnd = &oprnd1;
935 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
936 tree new_oprnd = make_ssa_name (half_type0, NULL);
937 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
938 old_oprnd, NULL_TREE);
939 *oprnd = new_oprnd;
942 /* Handle unsigned case. Look for
943 S6 u_prod_T = (unsigned TYPE) prod_T;
944 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
945 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
947 gimple use_stmt;
948 tree use_lhs;
949 tree use_type;
951 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
952 return NULL;
954 use_stmt = vect_single_imm_use (last_stmt);
955 if (!use_stmt || !is_gimple_assign (use_stmt)
956 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
957 return NULL;
959 use_lhs = gimple_assign_lhs (use_stmt);
960 use_type = TREE_TYPE (use_lhs);
961 if (!INTEGRAL_TYPE_P (use_type)
962 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
963 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
964 return NULL;
966 type = use_type;
967 last_stmt = use_stmt;
970 if (!types_compatible_p (half_type0, half_type1))
971 return NULL;
973 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
974 to get an intermediate result of type ITYPE. In this case we need
975 to build a statement to convert this intermediate result to type TYPE. */
976 tree itype = type;
977 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
978 itype = build_nonstandard_integer_type
979 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
980 TYPE_UNSIGNED (type));
982 /* Pattern detected. */
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "vect_recog_widen_mult_pattern: detected:\n");
987 /* Check target support */
988 vectype = get_vectype_for_scalar_type (half_type0);
989 vecitype = get_vectype_for_scalar_type (itype);
990 if (!vectype
991 || !vecitype
992 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
993 vecitype, vectype,
994 &dummy_code, &dummy_code,
995 &dummy_int, &dummy_vec))
996 return NULL;
998 *type_in = vectype;
999 *type_out = get_vectype_for_scalar_type (type);
1001 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1002 var = vect_recog_temp_ssa_var (itype, NULL);
1003 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
1004 oprnd1);
1006 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1007 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1008 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1009 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1011 /* If the original two operands have different sizes, we may need to convert
1012 the smaller one into the larget type. If this is the case, at this point
1013 the new stmt is already built. */
1014 if (new_stmt)
1016 append_pattern_def_seq (stmt_vinfo, new_stmt);
1017 stmt_vec_info new_stmt_info
1018 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
1019 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1020 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1023 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1024 the result of the widen-mult operation into type TYPE. */
1025 if (itype != type)
1027 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1028 stmt_vec_info pattern_stmt_info
1029 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1030 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1031 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1032 pattern_stmt
1033 = gimple_build_assign_with_ops (NOP_EXPR,
1034 vect_recog_temp_ssa_var (type, NULL),
1035 gimple_assign_lhs (pattern_stmt),
1036 NULL_TREE);
1039 if (dump_enabled_p ())
1040 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1042 stmts->safe_push (last_stmt);
1043 return pattern_stmt;
1047 /* Function vect_recog_pow_pattern
1049 Try to find the following pattern:
1051 x = POW (y, N);
1053 with POW being one of pow, powf, powi, powif and N being
1054 either 2 or 0.5.
1056 Input:
1058 * LAST_STMT: A stmt from which the pattern search begins.
1060 Output:
1062 * TYPE_IN: The type of the input arguments to the pattern.
1064 * TYPE_OUT: The type of the output of this pattern.
1066 * Return value: A new stmt that will be used to replace the sequence of
1067 stmts that constitute the pattern. In this case it will be:
1068 x = x * x
1070 x = sqrt (x)
1073 static gimple
1074 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
1075 tree *type_out)
1077 gimple last_stmt = (*stmts)[0];
1078 tree fn, base, exp = NULL;
1079 gimple stmt;
1080 tree var;
1082 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1083 return NULL;
1085 fn = gimple_call_fndecl (last_stmt);
1086 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1087 return NULL;
1089 switch (DECL_FUNCTION_CODE (fn))
1091 case BUILT_IN_POWIF:
1092 case BUILT_IN_POWI:
1093 case BUILT_IN_POWF:
1094 case BUILT_IN_POW:
1095 base = gimple_call_arg (last_stmt, 0);
1096 exp = gimple_call_arg (last_stmt, 1);
1097 if (TREE_CODE (exp) != REAL_CST
1098 && TREE_CODE (exp) != INTEGER_CST)
1099 return NULL;
1100 break;
1102 default:
1103 return NULL;
1106 /* We now have a pow or powi builtin function call with a constant
1107 exponent. */
1109 *type_out = NULL_TREE;
1111 /* Catch squaring. */
1112 if ((tree_fits_shwi_p (exp)
1113 && tree_to_shwi (exp) == 2)
1114 || (TREE_CODE (exp) == REAL_CST
1115 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
1117 *type_in = TREE_TYPE (base);
1119 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1120 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
1121 return stmt;
1124 /* Catch square root. */
1125 if (TREE_CODE (exp) == REAL_CST
1126 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1128 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1129 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1130 if (*type_in)
1132 gimple stmt = gimple_build_call (newfn, 1, base);
1133 if (vectorizable_function (stmt, *type_in, *type_in)
1134 != NULL_TREE)
1136 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1137 gimple_call_set_lhs (stmt, var);
1138 return stmt;
1143 return NULL;
1147 /* Function vect_recog_widen_sum_pattern
1149 Try to find the following pattern:
1151 type x_t;
1152 TYPE x_T, sum = init;
1153 loop:
1154 sum_0 = phi <init, sum_1>
1155 S1 x_t = *p;
1156 S2 x_T = (TYPE) x_t;
1157 S3 sum_1 = x_T + sum_0;
1159 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1160 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1161 a special case of a reduction computation.
1163 Input:
1165 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1166 when this function is called with S3, the pattern {S2,S3} will be detected.
1168 Output:
1170 * TYPE_IN: The type of the input arguments to the pattern.
1172 * TYPE_OUT: The type of the output of this pattern.
1174 * Return value: A new stmt that will be used to replace the sequence of
1175 stmts that constitute the pattern. In this case it will be:
1176 WIDEN_SUM <x_t, sum_0>
1178 Note: The widening-sum idiom is a widening reduction pattern that is
1179 vectorized without preserving all the intermediate results. It
1180 produces only N/2 (widened) results (by summing up pairs of
1181 intermediate results) rather than all N results. Therefore, we
1182 cannot allow this pattern when we want to get all the results and in
1183 the correct order (as is the case when this computation is in an
1184 inner-loop nested in an outer-loop that us being vectorized). */
1186 static gimple
1187 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
1188 tree *type_out)
1190 gimple stmt, last_stmt = (*stmts)[0];
1191 tree oprnd0, oprnd1;
1192 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1193 tree type, half_type;
1194 gimple pattern_stmt;
1195 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1196 struct loop *loop;
1197 tree var;
1198 bool promotion;
1200 if (!loop_info)
1201 return NULL;
1203 loop = LOOP_VINFO_LOOP (loop_info);
1205 if (!is_gimple_assign (last_stmt))
1206 return NULL;
1208 type = gimple_expr_type (last_stmt);
1210 /* Look for the following pattern
1211 DX = (TYPE) X;
1212 sum_1 = DX + sum_0;
1213 In which DX is at least double the size of X, and sum_1 has been
1214 recognized as a reduction variable.
1217 /* Starting from LAST_STMT, follow the defs of its uses in search
1218 of the above pattern. */
1220 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1221 return NULL;
1223 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
1224 return NULL;
1226 oprnd0 = gimple_assign_rhs1 (last_stmt);
1227 oprnd1 = gimple_assign_rhs2 (last_stmt);
1228 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1229 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1230 return NULL;
1232 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1233 we know that oprnd1 is the reduction variable (defined by a loop-header
1234 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1235 Left to check that oprnd0 is defined by a cast from type 'type' to type
1236 'TYPE'. */
1238 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1239 &promotion)
1240 || !promotion)
1241 return NULL;
1243 oprnd0 = gimple_assign_rhs1 (stmt);
1244 *type_in = half_type;
1245 *type_out = type;
1247 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1248 var = vect_recog_temp_ssa_var (type, NULL);
1249 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
1250 oprnd0, oprnd1);
1252 if (dump_enabled_p ())
1254 dump_printf_loc (MSG_NOTE, vect_location,
1255 "vect_recog_widen_sum_pattern: detected: ");
1256 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1257 dump_printf (MSG_NOTE, "\n");
1260 /* We don't allow changing the order of the computation in the inner-loop
1261 when doing outer-loop vectorization. */
1262 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
1264 return pattern_stmt;
1268 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1270 Input:
1271 STMT - a statement to check.
1272 DEF - we support operations with two operands, one of which is constant.
1273 The other operand can be defined by a demotion operation, or by a
1274 previous statement in a sequence of over-promoted operations. In the
1275 later case DEF is used to replace that operand. (It is defined by a
1276 pattern statement we created for the previous statement in the
1277 sequence).
1279 Input/output:
1280 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1281 NULL, it's the type of DEF.
1282 STMTS - additional pattern statements. If a pattern statement (type
1283 conversion) is created in this function, its original statement is
1284 added to STMTS.
1286 Output:
1287 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1288 operands to use in the new pattern statement for STMT (will be created
1289 in vect_recog_over_widening_pattern ()).
1290 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1291 statements for STMT: the first one is a type promotion and the second
1292 one is the operation itself. We return the type promotion statement
1293 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1294 the second pattern statement. */
1296 static bool
1297 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1298 tree *op0, tree *op1, gimple *new_def_stmt,
1299 vec<gimple> *stmts)
1301 enum tree_code code;
1302 tree const_oprnd, oprnd;
1303 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1304 gimple def_stmt, new_stmt;
1305 bool first = false;
1306 bool promotion;
1308 *op0 = NULL_TREE;
1309 *op1 = NULL_TREE;
1310 *new_def_stmt = NULL;
1312 if (!is_gimple_assign (stmt))
1313 return false;
1315 code = gimple_assign_rhs_code (stmt);
1316 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1317 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1318 return false;
1320 oprnd = gimple_assign_rhs1 (stmt);
1321 const_oprnd = gimple_assign_rhs2 (stmt);
1322 type = gimple_expr_type (stmt);
1324 if (TREE_CODE (oprnd) != SSA_NAME
1325 || TREE_CODE (const_oprnd) != INTEGER_CST)
1326 return false;
1328 /* If oprnd has other uses besides that in stmt we cannot mark it
1329 as being part of a pattern only. */
1330 if (!has_single_use (oprnd))
1331 return false;
1333 /* If we are in the middle of a sequence, we use DEF from a previous
1334 statement. Otherwise, OPRND has to be a result of type promotion. */
1335 if (*new_type)
1337 half_type = *new_type;
1338 oprnd = def;
1340 else
1342 first = true;
1343 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1344 &promotion)
1345 || !promotion
1346 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1347 return false;
1350 /* Can we perform the operation on a smaller type? */
1351 switch (code)
1353 case BIT_IOR_EXPR:
1354 case BIT_XOR_EXPR:
1355 case BIT_AND_EXPR:
1356 if (!int_fits_type_p (const_oprnd, half_type))
1358 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1359 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1360 return false;
1362 interm_type = build_nonstandard_integer_type (
1363 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1364 if (!int_fits_type_p (const_oprnd, interm_type))
1365 return false;
1368 break;
1370 case LSHIFT_EXPR:
1371 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1372 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1373 return false;
1375 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1376 (e.g., if the original value was char, the shift amount is at most 8
1377 if we want to use short). */
1378 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1379 return false;
1381 interm_type = build_nonstandard_integer_type (
1382 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1384 if (!vect_supportable_shift (code, interm_type))
1385 return false;
1387 break;
1389 case RSHIFT_EXPR:
1390 if (vect_supportable_shift (code, half_type))
1391 break;
1393 /* Try intermediate type - HALF_TYPE is not supported. */
1394 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1395 return false;
1397 interm_type = build_nonstandard_integer_type (
1398 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1400 if (!vect_supportable_shift (code, interm_type))
1401 return false;
1403 break;
1405 default:
1406 gcc_unreachable ();
1409 /* There are four possible cases:
1410 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1411 the first statement in the sequence)
1412 a. The original, HALF_TYPE, is not enough - we replace the promotion
1413 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1414 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1415 promotion.
1416 2. OPRND is defined by a pattern statement we created.
1417 a. Its type is not sufficient for the operation, we create a new stmt:
1418 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1419 this statement in NEW_DEF_STMT, and it is later put in
1420 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1421 b. OPRND is good to use in the new statement. */
1422 if (first)
1424 if (interm_type)
1426 /* Replace the original type conversion HALF_TYPE->TYPE with
1427 HALF_TYPE->INTERM_TYPE. */
1428 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1430 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1431 /* Check if the already created pattern stmt is what we need. */
1432 if (!is_gimple_assign (new_stmt)
1433 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1434 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1435 return false;
1437 stmts->safe_push (def_stmt);
1438 oprnd = gimple_assign_lhs (new_stmt);
1440 else
1442 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1443 oprnd = gimple_assign_rhs1 (def_stmt);
1444 new_oprnd = make_ssa_name (interm_type, NULL);
1445 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1446 oprnd, NULL_TREE);
1447 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1448 stmts->safe_push (def_stmt);
1449 oprnd = new_oprnd;
1452 else
1454 /* Retrieve the operand before the type promotion. */
1455 oprnd = gimple_assign_rhs1 (def_stmt);
1458 else
1460 if (interm_type)
1462 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1463 new_oprnd = make_ssa_name (interm_type, NULL);
1464 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1465 oprnd, NULL_TREE);
1466 oprnd = new_oprnd;
1467 *new_def_stmt = new_stmt;
1470 /* Otherwise, OPRND is already set. */
1473 if (interm_type)
1474 *new_type = interm_type;
1475 else
1476 *new_type = half_type;
1478 *op0 = oprnd;
1479 *op1 = fold_convert (*new_type, const_oprnd);
1481 return true;
1485 /* Try to find a statement or a sequence of statements that can be performed
1486 on a smaller type:
1488 type x_t;
1489 TYPE x_T, res0_T, res1_T;
1490 loop:
1491 S1 x_t = *p;
1492 S2 x_T = (TYPE) x_t;
1493 S3 res0_T = op (x_T, C0);
1494 S4 res1_T = op (res0_T, C1);
1495 S5 ... = () res1_T; - type demotion
1497 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1498 constants.
1499 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1500 be 'type' or some intermediate type. For now, we expect S5 to be a type
1501 demotion operation. We also check that S3 and S4 have only one use. */
1503 static gimple
1504 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1505 tree *type_in, tree *type_out)
1507 gimple stmt = stmts->pop ();
1508 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1509 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1510 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1511 bool first;
1512 tree type = NULL;
1514 first = true;
1515 while (1)
1517 if (!vinfo_for_stmt (stmt)
1518 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1519 return NULL;
1521 new_def_stmt = NULL;
1522 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1523 &op0, &op1, &new_def_stmt,
1524 stmts))
1526 if (first)
1527 return NULL;
1528 else
1529 break;
1532 /* STMT can be performed on a smaller type. Check its uses. */
1533 use_stmt = vect_single_imm_use (stmt);
1534 if (!use_stmt || !is_gimple_assign (use_stmt))
1535 return NULL;
1537 /* Create pattern statement for STMT. */
1538 vectype = get_vectype_for_scalar_type (new_type);
1539 if (!vectype)
1540 return NULL;
1542 /* We want to collect all the statements for which we create pattern
1543 statetments, except for the case when the last statement in the
1544 sequence doesn't have a corresponding pattern statement. In such
1545 case we associate the last pattern statement with the last statement
1546 in the sequence. Therefore, we only add the original statement to
1547 the list if we know that it is not the last. */
1548 if (prev_stmt)
1549 stmts->safe_push (prev_stmt);
1551 var = vect_recog_temp_ssa_var (new_type, NULL);
1552 pattern_stmt
1553 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1554 op0, op1);
1555 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1556 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1558 if (dump_enabled_p ())
1560 dump_printf_loc (MSG_NOTE, vect_location,
1561 "created pattern stmt: ");
1562 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1563 dump_printf (MSG_NOTE, "\n");
1566 type = gimple_expr_type (stmt);
1567 prev_stmt = stmt;
1568 stmt = use_stmt;
1570 first = false;
1573 /* We got a sequence. We expect it to end with a type demotion operation.
1574 Otherwise, we quit (for now). There are three possible cases: the
1575 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1576 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1577 NEW_TYPE differs (we create a new conversion statement). */
1578 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1580 use_lhs = gimple_assign_lhs (use_stmt);
1581 use_type = TREE_TYPE (use_lhs);
1582 /* Support only type demotion or signedess change. */
1583 if (!INTEGRAL_TYPE_P (use_type)
1584 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1585 return NULL;
1587 /* Check that NEW_TYPE is not bigger than the conversion result. */
1588 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1589 return NULL;
1591 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1592 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1594 /* Create NEW_TYPE->USE_TYPE conversion. */
1595 new_oprnd = make_ssa_name (use_type, NULL);
1596 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1597 var, NULL_TREE);
1598 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1600 *type_in = get_vectype_for_scalar_type (new_type);
1601 *type_out = get_vectype_for_scalar_type (use_type);
1603 /* We created a pattern statement for the last statement in the
1604 sequence, so we don't need to associate it with the pattern
1605 statement created for PREV_STMT. Therefore, we add PREV_STMT
1606 to the list in order to mark it later in vect_pattern_recog_1. */
1607 if (prev_stmt)
1608 stmts->safe_push (prev_stmt);
1610 else
1612 if (prev_stmt)
1613 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1614 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1616 *type_in = vectype;
1617 *type_out = NULL_TREE;
1620 stmts->safe_push (use_stmt);
1622 else
1623 /* TODO: support general case, create a conversion to the correct type. */
1624 return NULL;
1626 /* Pattern detected. */
1627 if (dump_enabled_p ())
1629 dump_printf_loc (MSG_NOTE, vect_location,
1630 "vect_recog_over_widening_pattern: detected: ");
1631 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1632 dump_printf (MSG_NOTE, "\n");
1635 return pattern_stmt;
1638 /* Detect widening shift pattern:
1640 type a_t;
1641 TYPE a_T, res_T;
1643 S1 a_t = ;
1644 S2 a_T = (TYPE) a_t;
1645 S3 res_T = a_T << CONST;
1647 where type 'TYPE' is at least double the size of type 'type'.
1649 Also detect cases where the shift result is immediately converted
1650 to another type 'result_type' that is no larger in size than 'TYPE'.
1651 In those cases we perform a widen-shift that directly results in
1652 'result_type', to avoid a possible over-widening situation:
1654 type a_t;
1655 TYPE a_T, res_T;
1656 result_type res_result;
1658 S1 a_t = ;
1659 S2 a_T = (TYPE) a_t;
1660 S3 res_T = a_T << CONST;
1661 S4 res_result = (result_type) res_T;
1662 '--> res_result' = a_t w<< CONST;
1664 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1665 create an additional pattern stmt for S2 to create a variable of an
1666 intermediate type, and perform widen-shift on the intermediate type:
1668 type a_t;
1669 interm_type a_it;
1670 TYPE a_T, res_T, res_T';
1672 S1 a_t = ;
1673 S2 a_T = (TYPE) a_t;
1674 '--> a_it = (interm_type) a_t;
1675 S3 res_T = a_T << CONST;
1676 '--> res_T' = a_it <<* CONST;
1678 Input/Output:
1680 * STMTS: Contains a stmt from which the pattern search begins.
1681 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1682 in STMTS. When an intermediate type is used and a pattern statement is
1683 created for S2, we also put S2 here (before S3).
1685 Output:
1687 * TYPE_IN: The type of the input arguments to the pattern.
1689 * TYPE_OUT: The type of the output of this pattern.
1691 * Return value: A new stmt that will be used to replace the sequence of
1692 stmts that constitute the pattern. In this case it will be:
1693 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1695 static gimple
1696 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1697 tree *type_in, tree *type_out)
1699 gimple last_stmt = stmts->pop ();
1700 gimple def_stmt0;
1701 tree oprnd0, oprnd1;
1702 tree type, half_type0;
1703 gimple pattern_stmt;
1704 tree vectype, vectype_out = NULL_TREE;
1705 tree var;
1706 enum tree_code dummy_code;
1707 int dummy_int;
1708 vec<tree> dummy_vec;
1709 gimple use_stmt;
1710 bool promotion;
1712 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1713 return NULL;
1715 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1716 return NULL;
1718 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1719 return NULL;
1721 oprnd0 = gimple_assign_rhs1 (last_stmt);
1722 oprnd1 = gimple_assign_rhs2 (last_stmt);
1723 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1724 return NULL;
1726 /* Check operand 0: it has to be defined by a type promotion. */
1727 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1728 &promotion)
1729 || !promotion)
1730 return NULL;
1732 /* Check operand 1: has to be positive. We check that it fits the type
1733 in vect_handle_widen_op_by_const (). */
1734 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1735 return NULL;
1737 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1738 type = gimple_expr_type (last_stmt);
1740 /* Check for subsequent conversion to another type. */
1741 use_stmt = vect_single_imm_use (last_stmt);
1742 if (use_stmt && is_gimple_assign (use_stmt)
1743 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1744 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1746 tree use_lhs = gimple_assign_lhs (use_stmt);
1747 tree use_type = TREE_TYPE (use_lhs);
1749 if (INTEGRAL_TYPE_P (use_type)
1750 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1752 last_stmt = use_stmt;
1753 type = use_type;
1757 /* Check if this a widening operation. */
1758 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1759 &oprnd0, stmts,
1760 type, &half_type0, def_stmt0))
1761 return NULL;
1763 /* Pattern detected. */
1764 if (dump_enabled_p ())
1765 dump_printf_loc (MSG_NOTE, vect_location,
1766 "vect_recog_widen_shift_pattern: detected:\n");
1768 /* Check target support. */
1769 vectype = get_vectype_for_scalar_type (half_type0);
1770 vectype_out = get_vectype_for_scalar_type (type);
1772 if (!vectype
1773 || !vectype_out
1774 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1775 vectype_out, vectype,
1776 &dummy_code, &dummy_code,
1777 &dummy_int, &dummy_vec))
1778 return NULL;
1780 *type_in = vectype;
1781 *type_out = vectype_out;
1783 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1784 var = vect_recog_temp_ssa_var (type, NULL);
1785 pattern_stmt =
1786 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1788 if (dump_enabled_p ())
1789 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1791 stmts->safe_push (last_stmt);
1792 return pattern_stmt;
1795 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1797 type a_t, b_t, c_t;
1799 S0 a_t = b_t r<< c_t;
1801 Input/Output:
1803 * STMTS: Contains a stmt from which the pattern search begins,
1804 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1805 with a sequence:
1807 S1 d_t = -c_t;
1808 S2 e_t = d_t & (B - 1);
1809 S3 f_t = b_t << c_t;
1810 S4 g_t = b_t >> e_t;
1811 S0 a_t = f_t | g_t;
1813 where B is element bitsize of type.
1815 Output:
1817 * TYPE_IN: The type of the input arguments to the pattern.
1819 * TYPE_OUT: The type of the output of this pattern.
1821 * Return value: A new stmt that will be used to replace the rotate
1822 S0 stmt. */
1824 static gimple
1825 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1827 gimple last_stmt = stmts->pop ();
1828 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1829 gimple pattern_stmt, def_stmt;
1830 enum tree_code rhs_code;
1831 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1832 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1833 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1834 enum vect_def_type dt;
1835 optab optab1, optab2;
1836 edge ext_def = NULL;
1838 if (!is_gimple_assign (last_stmt))
1839 return NULL;
1841 rhs_code = gimple_assign_rhs_code (last_stmt);
1842 switch (rhs_code)
1844 case LROTATE_EXPR:
1845 case RROTATE_EXPR:
1846 break;
1847 default:
1848 return NULL;
1851 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1852 return NULL;
1854 lhs = gimple_assign_lhs (last_stmt);
1855 oprnd0 = gimple_assign_rhs1 (last_stmt);
1856 type = TREE_TYPE (oprnd0);
1857 oprnd1 = gimple_assign_rhs2 (last_stmt);
1858 if (TREE_CODE (oprnd0) != SSA_NAME
1859 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1860 || !INTEGRAL_TYPE_P (type)
1861 || !TYPE_UNSIGNED (type))
1862 return NULL;
1864 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1865 &def, &dt))
1866 return NULL;
1868 if (dt != vect_internal_def
1869 && dt != vect_constant_def
1870 && dt != vect_external_def)
1871 return NULL;
1873 vectype = get_vectype_for_scalar_type (type);
1874 if (vectype == NULL_TREE)
1875 return NULL;
1877 /* If vector/vector or vector/scalar rotate is supported by the target,
1878 don't do anything here. */
1879 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1880 if (optab1
1881 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1882 return NULL;
1884 if (bb_vinfo != NULL || dt != vect_internal_def)
1886 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1887 if (optab2
1888 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1889 return NULL;
1892 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1893 don't do anything here either. */
1894 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1895 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1896 if (!optab1
1897 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1898 || !optab2
1899 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1901 if (bb_vinfo == NULL && dt == vect_internal_def)
1902 return NULL;
1903 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1904 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1905 if (!optab1
1906 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1907 || !optab2
1908 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1909 return NULL;
1912 *type_in = vectype;
1913 *type_out = vectype;
1914 if (*type_in == NULL_TREE)
1915 return NULL;
1917 if (dt == vect_external_def
1918 && TREE_CODE (oprnd1) == SSA_NAME
1919 && loop_vinfo)
1921 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1922 ext_def = loop_preheader_edge (loop);
1923 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1925 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1926 if (bb == NULL
1927 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1928 ext_def = NULL;
1932 def = NULL_TREE;
1933 if (TREE_CODE (oprnd1) == INTEGER_CST
1934 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1935 def = oprnd1;
1936 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1938 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1939 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1940 && TYPE_PRECISION (TREE_TYPE (rhs1))
1941 == TYPE_PRECISION (type))
1942 def = rhs1;
1945 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1946 if (def == NULL_TREE)
1948 def = vect_recog_temp_ssa_var (type, NULL);
1949 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1950 NULL_TREE);
1951 if (ext_def)
1953 basic_block new_bb
1954 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1955 gcc_assert (!new_bb);
1957 else
1958 append_pattern_def_seq (stmt_vinfo, def_stmt);
1960 stype = TREE_TYPE (def);
1962 if (TREE_CODE (def) == INTEGER_CST)
1964 if (!tree_fits_uhwi_p (def)
1965 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1966 || integer_zerop (def))
1967 return NULL;
1968 def2 = build_int_cst (stype,
1969 GET_MODE_PRECISION (TYPE_MODE (type))
1970 - tree_to_uhwi (def));
1972 else
1974 tree vecstype = get_vectype_for_scalar_type (stype);
1975 stmt_vec_info def_stmt_vinfo;
1977 if (vecstype == NULL_TREE)
1978 return NULL;
1979 def2 = vect_recog_temp_ssa_var (stype, NULL);
1980 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1981 NULL_TREE);
1982 if (ext_def)
1984 basic_block new_bb
1985 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1986 gcc_assert (!new_bb);
1988 else
1990 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1991 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1992 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1993 append_pattern_def_seq (stmt_vinfo, def_stmt);
1996 def2 = vect_recog_temp_ssa_var (stype, NULL);
1997 tree mask
1998 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1999 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
2000 gimple_assign_lhs (def_stmt),
2001 mask);
2002 if (ext_def)
2004 basic_block new_bb
2005 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2006 gcc_assert (!new_bb);
2008 else
2010 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2011 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2012 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2013 append_pattern_def_seq (stmt_vinfo, def_stmt);
2017 var1 = vect_recog_temp_ssa_var (type, NULL);
2018 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
2019 ? LSHIFT_EXPR : RSHIFT_EXPR,
2020 var1, oprnd0, def);
2021 append_pattern_def_seq (stmt_vinfo, def_stmt);
2023 var2 = vect_recog_temp_ssa_var (type, NULL);
2024 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
2025 ? RSHIFT_EXPR : LSHIFT_EXPR,
2026 var2, oprnd0, def2);
2027 append_pattern_def_seq (stmt_vinfo, def_stmt);
2029 /* Pattern detected. */
2030 if (dump_enabled_p ())
2031 dump_printf_loc (MSG_NOTE, vect_location,
2032 "vect_recog_rotate_pattern: detected:\n");
2034 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2035 var = vect_recog_temp_ssa_var (type, NULL);
2036 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
2038 if (dump_enabled_p ())
2039 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2041 stmts->safe_push (last_stmt);
2042 return pattern_stmt;
2045 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2046 vectorized:
2048 type a_t;
2049 TYPE b_T, res_T;
2051 S1 a_t = ;
2052 S2 b_T = ;
2053 S3 res_T = b_T op a_t;
2055 where type 'TYPE' is a type with different size than 'type',
2056 and op is <<, >> or rotate.
2058 Also detect cases:
2060 type a_t;
2061 TYPE b_T, c_T, res_T;
2063 S0 c_T = ;
2064 S1 a_t = (type) c_T;
2065 S2 b_T = ;
2066 S3 res_T = b_T op a_t;
2068 Input/Output:
2070 * STMTS: Contains a stmt from which the pattern search begins,
2071 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2072 with a shift/rotate which has same type on both operands, in the
2073 second case just b_T op c_T, in the first case with added cast
2074 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2076 Output:
2078 * TYPE_IN: The type of the input arguments to the pattern.
2080 * TYPE_OUT: The type of the output of this pattern.
2082 * Return value: A new stmt that will be used to replace the shift/rotate
2083 S3 stmt. */
2085 static gimple
2086 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
2087 tree *type_in, tree *type_out)
2089 gimple last_stmt = stmts->pop ();
2090 tree oprnd0, oprnd1, lhs, var;
2091 gimple pattern_stmt, def_stmt;
2092 enum tree_code rhs_code;
2093 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2094 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2095 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2096 enum vect_def_type dt;
2097 tree def;
2099 if (!is_gimple_assign (last_stmt))
2100 return NULL;
2102 rhs_code = gimple_assign_rhs_code (last_stmt);
2103 switch (rhs_code)
2105 case LSHIFT_EXPR:
2106 case RSHIFT_EXPR:
2107 case LROTATE_EXPR:
2108 case RROTATE_EXPR:
2109 break;
2110 default:
2111 return NULL;
2114 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2115 return NULL;
2117 lhs = gimple_assign_lhs (last_stmt);
2118 oprnd0 = gimple_assign_rhs1 (last_stmt);
2119 oprnd1 = gimple_assign_rhs2 (last_stmt);
2120 if (TREE_CODE (oprnd0) != SSA_NAME
2121 || TREE_CODE (oprnd1) != SSA_NAME
2122 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2123 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2124 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2125 || TYPE_PRECISION (TREE_TYPE (lhs))
2126 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2127 return NULL;
2129 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
2130 &def, &dt))
2131 return NULL;
2133 if (dt != vect_internal_def)
2134 return NULL;
2136 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2137 *type_out = *type_in;
2138 if (*type_in == NULL_TREE)
2139 return NULL;
2141 def = NULL_TREE;
2142 if (gimple_assign_cast_p (def_stmt))
2144 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2145 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2146 && TYPE_PRECISION (TREE_TYPE (rhs1))
2147 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2148 def = rhs1;
2151 if (def == NULL_TREE)
2153 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2154 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
2155 NULL_TREE);
2156 new_pattern_def_seq (stmt_vinfo, def_stmt);
2159 /* Pattern detected. */
2160 if (dump_enabled_p ())
2161 dump_printf_loc (MSG_NOTE, vect_location,
2162 "vect_recog_vector_vector_shift_pattern: detected:\n");
2164 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2165 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2166 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
2168 if (dump_enabled_p ())
2169 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2171 stmts->safe_push (last_stmt);
2172 return pattern_stmt;
2175 /* Detect a signed division by a constant that wouldn't be
2176 otherwise vectorized:
2178 type a_t, b_t;
2180 S1 a_t = b_t / N;
2182 where type 'type' is an integral type and N is a constant.
2184 Similarly handle modulo by a constant:
2186 S4 a_t = b_t % N;
2188 Input/Output:
2190 * STMTS: Contains a stmt from which the pattern search begins,
2191 i.e. the division stmt. S1 is replaced by if N is a power
2192 of two constant and type is signed:
2193 S3 y_t = b_t < 0 ? N - 1 : 0;
2194 S2 x_t = b_t + y_t;
2195 S1' a_t = x_t >> log2 (N);
2197 S4 is replaced if N is a power of two constant and
2198 type is signed by (where *_T temporaries have unsigned type):
2199 S9 y_T = b_t < 0 ? -1U : 0U;
2200 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2201 S7 z_t = (type) z_T;
2202 S6 w_t = b_t + z_t;
2203 S5 x_t = w_t & (N - 1);
2204 S4' a_t = x_t - z_t;
2206 Output:
2208 * TYPE_IN: The type of the input arguments to the pattern.
2210 * TYPE_OUT: The type of the output of this pattern.
2212 * Return value: A new stmt that will be used to replace the division
2213 S1 or modulo S4 stmt. */
2215 static gimple
2216 vect_recog_divmod_pattern (vec<gimple> *stmts,
2217 tree *type_in, tree *type_out)
2219 gimple last_stmt = stmts->pop ();
2220 tree oprnd0, oprnd1, vectype, itype, cond;
2221 gimple pattern_stmt, def_stmt;
2222 enum tree_code rhs_code;
2223 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2224 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2225 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2226 optab optab;
2227 tree q;
2228 int dummy_int, prec;
2229 stmt_vec_info def_stmt_vinfo;
2231 if (!is_gimple_assign (last_stmt))
2232 return NULL;
2234 rhs_code = gimple_assign_rhs_code (last_stmt);
2235 switch (rhs_code)
2237 case TRUNC_DIV_EXPR:
2238 case TRUNC_MOD_EXPR:
2239 break;
2240 default:
2241 return NULL;
2244 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2245 return NULL;
2247 oprnd0 = gimple_assign_rhs1 (last_stmt);
2248 oprnd1 = gimple_assign_rhs2 (last_stmt);
2249 itype = TREE_TYPE (oprnd0);
2250 if (TREE_CODE (oprnd0) != SSA_NAME
2251 || TREE_CODE (oprnd1) != INTEGER_CST
2252 || TREE_CODE (itype) != INTEGER_TYPE
2253 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2254 return NULL;
2256 vectype = get_vectype_for_scalar_type (itype);
2257 if (vectype == NULL_TREE)
2258 return NULL;
2260 /* If the target can handle vectorized division or modulo natively,
2261 don't attempt to optimize this. */
2262 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2263 if (optab != unknown_optab)
2265 machine_mode vec_mode = TYPE_MODE (vectype);
2266 int icode = (int) optab_handler (optab, vec_mode);
2267 if (icode != CODE_FOR_nothing)
2268 return NULL;
2271 prec = TYPE_PRECISION (itype);
2272 if (integer_pow2p (oprnd1))
2274 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2275 return NULL;
2277 /* Pattern detected. */
2278 if (dump_enabled_p ())
2279 dump_printf_loc (MSG_NOTE, vect_location,
2280 "vect_recog_divmod_pattern: detected:\n");
2282 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2283 build_int_cst (itype, 0));
2284 if (rhs_code == TRUNC_DIV_EXPR)
2286 tree var = vect_recog_temp_ssa_var (itype, NULL);
2287 tree shift;
2288 def_stmt
2289 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2290 fold_build2 (MINUS_EXPR, itype,
2291 oprnd1,
2292 build_int_cst (itype,
2293 1)),
2294 build_int_cst (itype, 0));
2295 new_pattern_def_seq (stmt_vinfo, def_stmt);
2296 var = vect_recog_temp_ssa_var (itype, NULL);
2297 def_stmt
2298 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
2299 gimple_assign_lhs (def_stmt));
2300 append_pattern_def_seq (stmt_vinfo, def_stmt);
2302 shift = build_int_cst (itype, tree_log2 (oprnd1));
2303 pattern_stmt
2304 = gimple_build_assign_with_ops (RSHIFT_EXPR,
2305 vect_recog_temp_ssa_var (itype,
2306 NULL),
2307 var, shift);
2309 else
2311 tree signmask;
2312 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2313 if (compare_tree_int (oprnd1, 2) == 0)
2315 signmask = vect_recog_temp_ssa_var (itype, NULL);
2316 def_stmt
2317 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
2318 build_int_cst (itype, 1),
2319 build_int_cst (itype, 0));
2320 append_pattern_def_seq (stmt_vinfo, def_stmt);
2322 else
2324 tree utype
2325 = build_nonstandard_integer_type (prec, 1);
2326 tree vecutype = get_vectype_for_scalar_type (utype);
2327 tree shift
2328 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2329 - tree_log2 (oprnd1));
2330 tree var = vect_recog_temp_ssa_var (utype, NULL);
2332 def_stmt
2333 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2334 build_int_cst (utype, -1),
2335 build_int_cst (utype, 0));
2336 def_stmt_vinfo
2337 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2338 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2339 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2340 append_pattern_def_seq (stmt_vinfo, def_stmt);
2341 var = vect_recog_temp_ssa_var (utype, NULL);
2342 def_stmt
2343 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2344 gimple_assign_lhs (def_stmt),
2345 shift);
2346 def_stmt_vinfo
2347 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2348 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2349 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2350 append_pattern_def_seq (stmt_vinfo, def_stmt);
2351 signmask = vect_recog_temp_ssa_var (itype, NULL);
2352 def_stmt
2353 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2354 NULL_TREE);
2355 append_pattern_def_seq (stmt_vinfo, def_stmt);
2357 def_stmt
2358 = gimple_build_assign_with_ops (PLUS_EXPR,
2359 vect_recog_temp_ssa_var (itype,
2360 NULL),
2361 oprnd0, signmask);
2362 append_pattern_def_seq (stmt_vinfo, def_stmt);
2363 def_stmt
2364 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2365 vect_recog_temp_ssa_var (itype,
2366 NULL),
2367 gimple_assign_lhs (def_stmt),
2368 fold_build2 (MINUS_EXPR, itype,
2369 oprnd1,
2370 build_int_cst (itype,
2371 1)));
2372 append_pattern_def_seq (stmt_vinfo, def_stmt);
2374 pattern_stmt
2375 = gimple_build_assign_with_ops (MINUS_EXPR,
2376 vect_recog_temp_ssa_var (itype,
2377 NULL),
2378 gimple_assign_lhs (def_stmt),
2379 signmask);
2382 if (dump_enabled_p ())
2383 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2386 stmts->safe_push (last_stmt);
2388 *type_in = vectype;
2389 *type_out = vectype;
2390 return pattern_stmt;
2393 if (prec > HOST_BITS_PER_WIDE_INT
2394 || integer_zerop (oprnd1))
2395 return NULL;
2397 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2398 return NULL;
2400 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2402 if (TYPE_UNSIGNED (itype))
2404 unsigned HOST_WIDE_INT mh, ml;
2405 int pre_shift, post_shift;
2406 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2407 & GET_MODE_MASK (TYPE_MODE (itype)));
2408 tree t1, t2, t3, t4;
2410 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2411 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2412 return NULL;
2414 /* Find a suitable multiplier and right shift count
2415 instead of multiplying with D. */
2416 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2418 /* If the suggested multiplier is more than SIZE bits, we can do better
2419 for even divisors, using an initial right shift. */
2420 if (mh != 0 && (d & 1) == 0)
2422 pre_shift = floor_log2 (d & -d);
2423 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2424 &ml, &post_shift, &dummy_int);
2425 gcc_assert (!mh);
2427 else
2428 pre_shift = 0;
2430 if (mh != 0)
2432 if (post_shift - 1 >= prec)
2433 return NULL;
2435 /* t1 = oprnd0 h* ml;
2436 t2 = oprnd0 - t1;
2437 t3 = t2 >> 1;
2438 t4 = t1 + t3;
2439 q = t4 >> (post_shift - 1); */
2440 t1 = vect_recog_temp_ssa_var (itype, NULL);
2441 def_stmt
2442 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2443 build_int_cst (itype, ml));
2444 append_pattern_def_seq (stmt_vinfo, def_stmt);
2446 t2 = vect_recog_temp_ssa_var (itype, NULL);
2447 def_stmt
2448 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
2449 append_pattern_def_seq (stmt_vinfo, def_stmt);
2451 t3 = vect_recog_temp_ssa_var (itype, NULL);
2452 def_stmt
2453 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2454 integer_one_node);
2455 append_pattern_def_seq (stmt_vinfo, def_stmt);
2457 t4 = vect_recog_temp_ssa_var (itype, NULL);
2458 def_stmt
2459 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
2461 if (post_shift != 1)
2463 append_pattern_def_seq (stmt_vinfo, def_stmt);
2465 q = vect_recog_temp_ssa_var (itype, NULL);
2466 pattern_stmt
2467 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
2468 build_int_cst (itype,
2469 post_shift
2470 - 1));
2472 else
2474 q = t4;
2475 pattern_stmt = def_stmt;
2478 else
2480 if (pre_shift >= prec || post_shift >= prec)
2481 return NULL;
2483 /* t1 = oprnd0 >> pre_shift;
2484 t2 = t1 h* ml;
2485 q = t2 >> post_shift; */
2486 if (pre_shift)
2488 t1 = vect_recog_temp_ssa_var (itype, NULL);
2489 def_stmt
2490 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2491 build_int_cst (NULL,
2492 pre_shift));
2493 append_pattern_def_seq (stmt_vinfo, def_stmt);
2495 else
2496 t1 = oprnd0;
2498 t2 = vect_recog_temp_ssa_var (itype, NULL);
2499 def_stmt
2500 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
2501 build_int_cst (itype, ml));
2503 if (post_shift)
2505 append_pattern_def_seq (stmt_vinfo, def_stmt);
2507 q = vect_recog_temp_ssa_var (itype, NULL);
2508 def_stmt
2509 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2510 build_int_cst (itype,
2511 post_shift));
2513 else
2514 q = t2;
2516 pattern_stmt = def_stmt;
2519 else
2521 unsigned HOST_WIDE_INT ml;
2522 int post_shift;
2523 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2524 unsigned HOST_WIDE_INT abs_d;
2525 bool add = false;
2526 tree t1, t2, t3, t4;
2528 /* Give up for -1. */
2529 if (d == -1)
2530 return NULL;
2532 /* Since d might be INT_MIN, we have to cast to
2533 unsigned HOST_WIDE_INT before negating to avoid
2534 undefined signed overflow. */
2535 abs_d = (d >= 0
2536 ? (unsigned HOST_WIDE_INT) d
2537 : - (unsigned HOST_WIDE_INT) d);
2539 /* n rem d = n rem -d */
2540 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2542 d = abs_d;
2543 oprnd1 = build_int_cst (itype, abs_d);
2545 else if (HOST_BITS_PER_WIDE_INT >= prec
2546 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2547 /* This case is not handled correctly below. */
2548 return NULL;
2550 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2551 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2553 add = true;
2554 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2556 if (post_shift >= prec)
2557 return NULL;
2559 /* t1 = oprnd0 h* ml; */
2560 t1 = vect_recog_temp_ssa_var (itype, NULL);
2561 def_stmt
2562 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
2563 build_int_cst (itype, ml));
2565 if (add)
2567 /* t2 = t1 + oprnd0; */
2568 append_pattern_def_seq (stmt_vinfo, def_stmt);
2569 t2 = vect_recog_temp_ssa_var (itype, NULL);
2570 def_stmt
2571 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
2573 else
2574 t2 = t1;
2576 if (post_shift)
2578 /* t3 = t2 >> post_shift; */
2579 append_pattern_def_seq (stmt_vinfo, def_stmt);
2580 t3 = vect_recog_temp_ssa_var (itype, NULL);
2581 def_stmt
2582 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
2583 build_int_cst (itype, post_shift));
2585 else
2586 t3 = t2;
2588 wide_int oprnd0_min, oprnd0_max;
2589 int msb = 1;
2590 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2592 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2593 msb = 0;
2594 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2595 msb = -1;
2598 if (msb == 0 && d >= 0)
2600 /* q = t3; */
2601 q = t3;
2602 pattern_stmt = def_stmt;
2604 else
2606 /* t4 = oprnd0 >> (prec - 1);
2607 or if we know from VRP that oprnd0 >= 0
2608 t4 = 0;
2609 or if we know from VRP that oprnd0 < 0
2610 t4 = -1; */
2611 append_pattern_def_seq (stmt_vinfo, def_stmt);
2612 t4 = vect_recog_temp_ssa_var (itype, NULL);
2613 if (msb != 1)
2614 def_stmt
2615 = gimple_build_assign_with_ops (INTEGER_CST,
2616 t4, build_int_cst (itype, msb),
2617 NULL_TREE);
2618 else
2619 def_stmt
2620 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2621 build_int_cst (itype, prec - 1));
2622 append_pattern_def_seq (stmt_vinfo, def_stmt);
2624 /* q = t3 - t4; or q = t4 - t3; */
2625 q = vect_recog_temp_ssa_var (itype, NULL);
2626 pattern_stmt
2627 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2628 d < 0 ? t3 : t4);
2632 if (rhs_code == TRUNC_MOD_EXPR)
2634 tree r, t1;
2636 /* We divided. Now finish by:
2637 t1 = q * oprnd1;
2638 r = oprnd0 - t1; */
2639 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2641 t1 = vect_recog_temp_ssa_var (itype, NULL);
2642 def_stmt
2643 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
2644 append_pattern_def_seq (stmt_vinfo, def_stmt);
2646 r = vect_recog_temp_ssa_var (itype, NULL);
2647 pattern_stmt
2648 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
2651 /* Pattern detected. */
2652 if (dump_enabled_p ())
2654 dump_printf_loc (MSG_NOTE, vect_location,
2655 "vect_recog_divmod_pattern: detected: ");
2656 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2657 dump_printf (MSG_NOTE, "\n");
2660 stmts->safe_push (last_stmt);
2662 *type_in = vectype;
2663 *type_out = vectype;
2664 return pattern_stmt;
2667 /* Function vect_recog_mixed_size_cond_pattern
2669 Try to find the following pattern:
2671 type x_t, y_t;
2672 TYPE a_T, b_T, c_T;
2673 loop:
2674 S1 a_T = x_t CMP y_t ? b_T : c_T;
2676 where type 'TYPE' is an integral type which has different size
2677 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2678 than 'type', the constants need to fit into an integer type
2679 with the same width as 'type') or results of conversion from 'type'.
2681 Input:
2683 * LAST_STMT: A stmt from which the pattern search begins.
2685 Output:
2687 * TYPE_IN: The type of the input arguments to the pattern.
2689 * TYPE_OUT: The type of the output of this pattern.
2691 * Return value: A new stmt that will be used to replace the pattern.
2692 Additionally a def_stmt is added.
2694 a_it = x_t CMP y_t ? b_it : c_it;
2695 a_T = (TYPE) a_it; */
2697 static gimple
2698 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2699 tree *type_out)
2701 gimple last_stmt = (*stmts)[0];
2702 tree cond_expr, then_clause, else_clause;
2703 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2704 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2705 machine_mode cmpmode;
2706 gimple pattern_stmt, def_stmt;
2707 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2708 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2709 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2710 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2711 bool promotion;
2712 tree comp_scalar_type;
2714 if (!is_gimple_assign (last_stmt)
2715 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2716 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2717 return NULL;
2719 cond_expr = gimple_assign_rhs1 (last_stmt);
2720 then_clause = gimple_assign_rhs2 (last_stmt);
2721 else_clause = gimple_assign_rhs3 (last_stmt);
2723 if (!COMPARISON_CLASS_P (cond_expr))
2724 return NULL;
2726 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2727 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2728 if (comp_vectype == NULL_TREE)
2729 return NULL;
2731 type = gimple_expr_type (last_stmt);
2732 if (types_compatible_p (type, comp_scalar_type)
2733 || ((TREE_CODE (then_clause) != INTEGER_CST
2734 || TREE_CODE (else_clause) != INTEGER_CST)
2735 && !INTEGRAL_TYPE_P (comp_scalar_type))
2736 || !INTEGRAL_TYPE_P (type))
2737 return NULL;
2739 if ((TREE_CODE (then_clause) != INTEGER_CST
2740 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2741 &def_stmt0, &promotion))
2742 || (TREE_CODE (else_clause) != INTEGER_CST
2743 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2744 &def_stmt1, &promotion)))
2745 return NULL;
2747 if (orig_type0 && orig_type1
2748 && !types_compatible_p (orig_type0, orig_type1))
2749 return NULL;
2751 if (orig_type0)
2753 if (!types_compatible_p (orig_type0, comp_scalar_type))
2754 return NULL;
2755 then_clause = gimple_assign_rhs1 (def_stmt0);
2756 itype = orig_type0;
2759 if (orig_type1)
2761 if (!types_compatible_p (orig_type1, comp_scalar_type))
2762 return NULL;
2763 else_clause = gimple_assign_rhs1 (def_stmt1);
2764 itype = orig_type1;
2767 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2769 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2770 return NULL;
2772 vectype = get_vectype_for_scalar_type (type);
2773 if (vectype == NULL_TREE)
2774 return NULL;
2776 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2777 return NULL;
2779 if (itype == NULL_TREE)
2780 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2781 TYPE_UNSIGNED (type));
2783 if (itype == NULL_TREE
2784 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2785 return NULL;
2787 vecitype = get_vectype_for_scalar_type (itype);
2788 if (vecitype == NULL_TREE)
2789 return NULL;
2791 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2792 return NULL;
2794 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2796 if ((TREE_CODE (then_clause) == INTEGER_CST
2797 && !int_fits_type_p (then_clause, itype))
2798 || (TREE_CODE (else_clause) == INTEGER_CST
2799 && !int_fits_type_p (else_clause, itype)))
2800 return NULL;
2803 def_stmt
2804 = gimple_build_assign_with_ops (COND_EXPR,
2805 vect_recog_temp_ssa_var (itype, NULL),
2806 unshare_expr (cond_expr),
2807 fold_convert (itype, then_clause),
2808 fold_convert (itype, else_clause));
2809 pattern_stmt
2810 = gimple_build_assign_with_ops (NOP_EXPR,
2811 vect_recog_temp_ssa_var (type, NULL),
2812 gimple_assign_lhs (def_stmt), NULL_TREE);
2814 new_pattern_def_seq (stmt_vinfo, def_stmt);
2815 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2816 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2817 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2818 *type_in = vecitype;
2819 *type_out = vectype;
2821 if (dump_enabled_p ())
2822 dump_printf_loc (MSG_NOTE, vect_location,
2823 "vect_recog_mixed_size_cond_pattern: detected:\n");
2825 return pattern_stmt;
2829 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2830 true if bool VAR can be optimized that way. */
2832 static bool
2833 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2835 gimple def_stmt;
2836 enum vect_def_type dt;
2837 tree def, rhs1;
2838 enum tree_code rhs_code;
2840 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2841 &dt))
2842 return false;
2844 if (dt != vect_internal_def)
2845 return false;
2847 if (!is_gimple_assign (def_stmt))
2848 return false;
2850 if (!has_single_use (def))
2851 return false;
2853 rhs1 = gimple_assign_rhs1 (def_stmt);
2854 rhs_code = gimple_assign_rhs_code (def_stmt);
2855 switch (rhs_code)
2857 case SSA_NAME:
2858 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2860 CASE_CONVERT:
2861 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2862 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2863 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2864 return false;
2865 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2867 case BIT_NOT_EXPR:
2868 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2870 case BIT_AND_EXPR:
2871 case BIT_IOR_EXPR:
2872 case BIT_XOR_EXPR:
2873 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2874 return false;
2875 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2876 bb_vinfo);
2878 default:
2879 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2881 tree vecitype, comp_vectype;
2883 /* If the comparison can throw, then is_gimple_condexpr will be
2884 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2885 if (stmt_could_throw_p (def_stmt))
2886 return false;
2888 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2889 if (comp_vectype == NULL_TREE)
2890 return false;
2892 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2894 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2895 tree itype
2896 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2897 vecitype = get_vectype_for_scalar_type (itype);
2898 if (vecitype == NULL_TREE)
2899 return false;
2901 else
2902 vecitype = comp_vectype;
2903 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2905 return false;
2910 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2911 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2912 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2914 static tree
2915 adjust_bool_pattern_cast (tree type, tree var)
2917 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2918 gimple cast_stmt, pattern_stmt;
2920 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2921 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2922 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2923 cast_stmt
2924 = gimple_build_assign_with_ops (NOP_EXPR,
2925 vect_recog_temp_ssa_var (type, NULL),
2926 gimple_assign_lhs (pattern_stmt),
2927 NULL_TREE);
2928 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2929 return gimple_assign_lhs (cast_stmt);
2933 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2934 recursively. VAR is an SSA_NAME that should be transformed from bool
2935 to a wider integer type, OUT_TYPE is the desired final integer type of
2936 the whole pattern, TRUEVAL should be NULL unless optimizing
2937 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2938 in the then_clause, STMTS is where statements with added pattern stmts
2939 should be pushed to. */
2941 static tree
2942 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2943 vec<gimple> *stmts)
2945 gimple stmt = SSA_NAME_DEF_STMT (var);
2946 enum tree_code rhs_code, def_rhs_code;
2947 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2948 location_t loc;
2949 gimple pattern_stmt, def_stmt;
2951 rhs1 = gimple_assign_rhs1 (stmt);
2952 rhs2 = gimple_assign_rhs2 (stmt);
2953 rhs_code = gimple_assign_rhs_code (stmt);
2954 loc = gimple_location (stmt);
2955 switch (rhs_code)
2957 case SSA_NAME:
2958 CASE_CONVERT:
2959 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2960 itype = TREE_TYPE (irhs1);
2961 pattern_stmt
2962 = gimple_build_assign_with_ops (SSA_NAME,
2963 vect_recog_temp_ssa_var (itype, NULL),
2964 irhs1, NULL_TREE);
2965 break;
2967 case BIT_NOT_EXPR:
2968 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2969 itype = TREE_TYPE (irhs1);
2970 pattern_stmt
2971 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2972 vect_recog_temp_ssa_var (itype, NULL),
2973 irhs1, build_int_cst (itype, 1));
2974 break;
2976 case BIT_AND_EXPR:
2977 /* Try to optimize x = y & (a < b ? 1 : 0); into
2978 x = (a < b ? y : 0);
2980 E.g. for:
2981 bool a_b, b_b, c_b;
2982 TYPE d_T;
2984 S1 a_b = x1 CMP1 y1;
2985 S2 b_b = x2 CMP2 y2;
2986 S3 c_b = a_b & b_b;
2987 S4 d_T = (TYPE) c_b;
2989 we would normally emit:
2991 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2992 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2993 S3' c_T = a_T & b_T;
2994 S4' d_T = c_T;
2996 but we can save one stmt by using the
2997 result of one of the COND_EXPRs in the other COND_EXPR and leave
2998 BIT_AND_EXPR stmt out:
3000 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3001 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3002 S4' f_T = c_T;
3004 At least when VEC_COND_EXPR is implemented using masks
3005 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3006 computes the comparison masks and ands it, in one case with
3007 all ones vector, in the other case with a vector register.
3008 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3009 often more expensive. */
3010 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3011 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3012 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3014 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3015 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3016 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3017 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3019 gimple tstmt;
3020 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3021 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3022 tstmt = stmts->pop ();
3023 gcc_assert (tstmt == def_stmt);
3024 stmts->quick_push (stmt);
3025 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3026 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3027 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3028 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3029 return irhs2;
3031 else
3032 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3033 goto and_ior_xor;
3035 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3036 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3037 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3039 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3040 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3041 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3042 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3044 gimple tstmt;
3045 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3046 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3047 tstmt = stmts->pop ();
3048 gcc_assert (tstmt == def_stmt);
3049 stmts->quick_push (stmt);
3050 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3051 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3052 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3053 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3054 return irhs1;
3056 else
3057 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3058 goto and_ior_xor;
3060 /* FALLTHRU */
3061 case BIT_IOR_EXPR:
3062 case BIT_XOR_EXPR:
3063 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3064 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3065 and_ior_xor:
3066 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3067 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3069 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3070 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3071 int out_prec = TYPE_PRECISION (out_type);
3072 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3073 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3074 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3075 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3076 else
3078 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3079 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3082 itype = TREE_TYPE (irhs1);
3083 pattern_stmt
3084 = gimple_build_assign_with_ops (rhs_code,
3085 vect_recog_temp_ssa_var (itype, NULL),
3086 irhs1, irhs2);
3087 break;
3089 default:
3090 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3091 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3092 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3093 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3094 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3096 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3097 itype
3098 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3100 else
3101 itype = TREE_TYPE (rhs1);
3102 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3103 if (trueval == NULL_TREE)
3104 trueval = build_int_cst (itype, 1);
3105 else
3106 gcc_checking_assert (useless_type_conversion_p (itype,
3107 TREE_TYPE (trueval)));
3108 pattern_stmt
3109 = gimple_build_assign_with_ops (COND_EXPR,
3110 vect_recog_temp_ssa_var (itype, NULL),
3111 cond_expr, trueval,
3112 build_int_cst (itype, 0));
3113 break;
3116 stmts->safe_push (stmt);
3117 gimple_set_location (pattern_stmt, loc);
3118 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3119 return gimple_assign_lhs (pattern_stmt);
3123 /* Function vect_recog_bool_pattern
3125 Try to find pattern like following:
3127 bool a_b, b_b, c_b, d_b, e_b;
3128 TYPE f_T;
3129 loop:
3130 S1 a_b = x1 CMP1 y1;
3131 S2 b_b = x2 CMP2 y2;
3132 S3 c_b = a_b & b_b;
3133 S4 d_b = x3 CMP3 y3;
3134 S5 e_b = c_b | d_b;
3135 S6 f_T = (TYPE) e_b;
3137 where type 'TYPE' is an integral type. Or a similar pattern
3138 ending in
3140 S6 f_Y = e_b ? r_Y : s_Y;
3142 as results from if-conversion of a complex condition.
3144 Input:
3146 * LAST_STMT: A stmt at the end from which the pattern
3147 search begins, i.e. cast of a bool to
3148 an integer type.
3150 Output:
3152 * TYPE_IN: The type of the input arguments to the pattern.
3154 * TYPE_OUT: The type of the output of this pattern.
3156 * Return value: A new stmt that will be used to replace the pattern.
3158 Assuming size of TYPE is the same as size of all comparisons
3159 (otherwise some casts would be added where needed), the above
3160 sequence we create related pattern stmts:
3161 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3162 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3163 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3164 S5' e_T = c_T | d_T;
3165 S6' f_T = e_T;
3167 Instead of the above S3' we could emit:
3168 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3169 S3' c_T = a_T | b_T;
3170 but the above is more efficient. */
3172 static gimple
3173 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
3174 tree *type_out)
3176 gimple last_stmt = stmts->pop ();
3177 enum tree_code rhs_code;
3178 tree var, lhs, rhs, vectype;
3179 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3180 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3181 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
3182 gimple pattern_stmt;
3184 if (!is_gimple_assign (last_stmt))
3185 return NULL;
3187 var = gimple_assign_rhs1 (last_stmt);
3188 lhs = gimple_assign_lhs (last_stmt);
3190 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3191 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3192 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3193 return NULL;
3195 rhs_code = gimple_assign_rhs_code (last_stmt);
3196 if (CONVERT_EXPR_CODE_P (rhs_code))
3198 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3199 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3200 return NULL;
3201 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3202 if (vectype == NULL_TREE)
3203 return NULL;
3205 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3206 return NULL;
3208 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3209 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3210 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3211 pattern_stmt
3212 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
3213 else
3214 pattern_stmt
3215 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
3216 *type_out = vectype;
3217 *type_in = vectype;
3218 stmts->safe_push (last_stmt);
3219 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_NOTE, vect_location,
3221 "vect_recog_bool_pattern: detected:\n");
3223 return pattern_stmt;
3225 else if (rhs_code == COND_EXPR
3226 && TREE_CODE (var) == SSA_NAME)
3228 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3229 if (vectype == NULL_TREE)
3230 return NULL;
3232 /* Build a scalar type for the boolean result that when
3233 vectorized matches the vector type of the result in
3234 size and number of elements. */
3235 unsigned prec
3236 = wi::udiv_trunc (TYPE_SIZE (vectype),
3237 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3238 tree type
3239 = build_nonstandard_integer_type (prec,
3240 TYPE_UNSIGNED (TREE_TYPE (var)));
3241 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3242 return NULL;
3244 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3245 return NULL;
3247 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3248 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3249 pattern_stmt
3250 = gimple_build_assign_with_ops (COND_EXPR, lhs,
3251 build2 (NE_EXPR, boolean_type_node,
3252 rhs, build_int_cst (type, 0)),
3253 gimple_assign_rhs2 (last_stmt),
3254 gimple_assign_rhs3 (last_stmt));
3255 *type_out = vectype;
3256 *type_in = vectype;
3257 stmts->safe_push (last_stmt);
3258 if (dump_enabled_p ())
3259 dump_printf_loc (MSG_NOTE, vect_location,
3260 "vect_recog_bool_pattern: detected:\n");
3262 return pattern_stmt;
3264 else if (rhs_code == SSA_NAME
3265 && STMT_VINFO_DATA_REF (stmt_vinfo))
3267 stmt_vec_info pattern_stmt_info;
3268 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3269 gcc_assert (vectype != NULL_TREE);
3270 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3271 return NULL;
3272 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3273 return NULL;
3275 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3276 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3277 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3279 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3280 gimple cast_stmt
3281 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
3282 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3283 rhs = rhs2;
3285 pattern_stmt
3286 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
3287 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3288 bb_vinfo);
3289 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3290 STMT_VINFO_DATA_REF (pattern_stmt_info)
3291 = STMT_VINFO_DATA_REF (stmt_vinfo);
3292 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3293 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3294 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3295 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3296 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3297 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3298 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3299 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3300 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3301 *type_out = vectype;
3302 *type_in = vectype;
3303 stmts->safe_push (last_stmt);
3304 if (dump_enabled_p ())
3305 dump_printf_loc (MSG_NOTE, vect_location,
3306 "vect_recog_bool_pattern: detected:\n");
3307 return pattern_stmt;
3309 else
3310 return NULL;
3314 /* Mark statements that are involved in a pattern. */
3316 static inline void
3317 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3318 tree pattern_vectype)
3320 stmt_vec_info pattern_stmt_info, def_stmt_info;
3321 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3322 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3323 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3324 gimple def_stmt;
3326 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3327 if (pattern_stmt_info == NULL)
3329 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3330 bb_vinfo);
3331 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3333 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3335 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3336 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3337 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3338 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3339 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3340 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3341 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3342 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3343 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3345 gimple_stmt_iterator si;
3346 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3347 !gsi_end_p (si); gsi_next (&si))
3349 def_stmt = gsi_stmt (si);
3350 def_stmt_info = vinfo_for_stmt (def_stmt);
3351 if (def_stmt_info == NULL)
3353 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3354 bb_vinfo);
3355 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3357 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3358 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3359 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3360 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3361 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3366 /* Function vect_pattern_recog_1
3368 Input:
3369 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3370 computation pattern.
3371 STMT: A stmt from which the pattern search should start.
3373 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3374 expression that computes the same functionality and can be used to
3375 replace the sequence of stmts that are involved in the pattern.
3377 Output:
3378 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3379 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3380 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3381 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3382 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3383 to the available target pattern.
3385 This function also does some bookkeeping, as explained in the documentation
3386 for vect_recog_pattern. */
3388 static void
3389 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3390 gimple_stmt_iterator si,
3391 vec<gimple> *stmts_to_replace)
3393 gimple stmt = gsi_stmt (si), pattern_stmt;
3394 stmt_vec_info stmt_info;
3395 loop_vec_info loop_vinfo;
3396 tree pattern_vectype;
3397 tree type_in, type_out;
3398 enum tree_code code;
3399 int i;
3400 gimple next;
3402 stmts_to_replace->truncate (0);
3403 stmts_to_replace->quick_push (stmt);
3404 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3405 if (!pattern_stmt)
3406 return;
3408 stmt = stmts_to_replace->last ();
3409 stmt_info = vinfo_for_stmt (stmt);
3410 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3412 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3414 /* No need to check target support (already checked by the pattern
3415 recognition function). */
3416 pattern_vectype = type_out ? type_out : type_in;
3418 else
3420 machine_mode vec_mode;
3421 enum insn_code icode;
3422 optab optab;
3424 /* Check target support */
3425 type_in = get_vectype_for_scalar_type (type_in);
3426 if (!type_in)
3427 return;
3428 if (type_out)
3429 type_out = get_vectype_for_scalar_type (type_out);
3430 else
3431 type_out = type_in;
3432 if (!type_out)
3433 return;
3434 pattern_vectype = type_out;
3436 if (is_gimple_assign (pattern_stmt))
3437 code = gimple_assign_rhs_code (pattern_stmt);
3438 else
3440 gcc_assert (is_gimple_call (pattern_stmt));
3441 code = CALL_EXPR;
3444 optab = optab_for_tree_code (code, type_in, optab_default);
3445 vec_mode = TYPE_MODE (type_in);
3446 if (!optab
3447 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3448 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3449 return;
3452 /* Found a vectorizable pattern. */
3453 if (dump_enabled_p ())
3455 dump_printf_loc (MSG_NOTE, vect_location,
3456 "pattern recognized: ");
3457 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3458 dump_printf (MSG_NOTE, "\n");
3461 /* Mark the stmts that are involved in the pattern. */
3462 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3464 /* Patterns cannot be vectorized using SLP, because they change the order of
3465 computation. */
3466 if (loop_vinfo)
3467 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3468 if (next == stmt)
3469 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3471 /* It is possible that additional pattern stmts are created and inserted in
3472 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3473 relevant statements. */
3474 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3475 && (unsigned) i < (stmts_to_replace->length () - 1);
3476 i++)
3478 stmt_info = vinfo_for_stmt (stmt);
3479 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3480 if (dump_enabled_p ())
3482 dump_printf_loc (MSG_NOTE, vect_location,
3483 "additional pattern stmt: ");
3484 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3485 dump_printf (MSG_NOTE, "\n");
3488 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3493 /* Function vect_pattern_recog
3495 Input:
3496 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3497 computation idioms.
3499 Output - for each computation idiom that is detected we create a new stmt
3500 that provides the same functionality and that can be vectorized. We
3501 also record some information in the struct_stmt_info of the relevant
3502 stmts, as explained below:
3504 At the entry to this function we have the following stmts, with the
3505 following initial value in the STMT_VINFO fields:
3507 stmt in_pattern_p related_stmt vec_stmt
3508 S1: a_i = .... - - -
3509 S2: a_2 = ..use(a_i).. - - -
3510 S3: a_1 = ..use(a_2).. - - -
3511 S4: a_0 = ..use(a_1).. - - -
3512 S5: ... = ..use(a_0).. - - -
3514 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3515 represented by a single stmt. We then:
3516 - create a new stmt S6 equivalent to the pattern (the stmt is not
3517 inserted into the code)
3518 - fill in the STMT_VINFO fields as follows:
3520 in_pattern_p related_stmt vec_stmt
3521 S1: a_i = .... - - -
3522 S2: a_2 = ..use(a_i).. - - -
3523 S3: a_1 = ..use(a_2).. - - -
3524 S4: a_0 = ..use(a_1).. true S6 -
3525 '---> S6: a_new = .... - S4 -
3526 S5: ... = ..use(a_0).. - - -
3528 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3529 to each other through the RELATED_STMT field).
3531 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3532 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3533 remain irrelevant unless used by stmts other than S4.
3535 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3536 (because they are marked as irrelevant). It will vectorize S6, and record
3537 a pointer to the new vector stmt VS6 from S6 (as usual).
3538 S4 will be skipped, and S5 will be vectorized as usual:
3540 in_pattern_p related_stmt vec_stmt
3541 S1: a_i = .... - - -
3542 S2: a_2 = ..use(a_i).. - - -
3543 S3: a_1 = ..use(a_2).. - - -
3544 > VS6: va_new = .... - - -
3545 S4: a_0 = ..use(a_1).. true S6 VS6
3546 '---> S6: a_new = .... - S4 VS6
3547 > VS5: ... = ..vuse(va_new).. - - -
3548 S5: ... = ..use(a_0).. - - -
3550 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3551 elsewhere), and we'll end up with:
3553 VS6: va_new = ....
3554 VS5: ... = ..vuse(va_new)..
3556 In case of more than one pattern statements, e.g., widen-mult with
3557 intermediate type:
3559 S1 a_t = ;
3560 S2 a_T = (TYPE) a_t;
3561 '--> S3: a_it = (interm_type) a_t;
3562 S4 prod_T = a_T * CONST;
3563 '--> S5: prod_T' = a_it w* CONST;
3565 there may be other users of a_T outside the pattern. In that case S2 will
3566 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3567 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3568 be recorded in S3. */
3570 void
3571 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3573 struct loop *loop;
3574 basic_block *bbs;
3575 unsigned int nbbs;
3576 gimple_stmt_iterator si;
3577 unsigned int i, j;
3578 vect_recog_func_ptr vect_recog_func;
3579 auto_vec<gimple, 1> stmts_to_replace;
3580 gimple stmt;
3582 if (dump_enabled_p ())
3583 dump_printf_loc (MSG_NOTE, vect_location,
3584 "=== vect_pattern_recog ===\n");
3586 if (loop_vinfo)
3588 loop = LOOP_VINFO_LOOP (loop_vinfo);
3589 bbs = LOOP_VINFO_BBS (loop_vinfo);
3590 nbbs = loop->num_nodes;
3592 else
3594 bbs = &BB_VINFO_BB (bb_vinfo);
3595 nbbs = 1;
3598 /* Scan through the loop stmts, applying the pattern recognition
3599 functions starting at each stmt visited: */
3600 for (i = 0; i < nbbs; i++)
3602 basic_block bb = bbs[i];
3603 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3605 if (bb_vinfo && (stmt = gsi_stmt (si))
3606 && vinfo_for_stmt (stmt)
3607 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3608 continue;
3610 /* Scan over all generic vect_recog_xxx_pattern functions. */
3611 for (j = 0; j < NUM_PATTERNS; j++)
3613 vect_recog_func = vect_vect_recog_func_ptrs[j];
3614 vect_pattern_recog_1 (vect_recog_func, si,
3615 &stmts_to_replace);