* g++.dg/template/using30.C: Move ...
[official-gcc.git] / gcc / tree-vect-patterns.c
blobd5effab186bc3b65eff5683834434cd0f2e7cd03
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "stor-layout.h"
27 #include "target.h"
28 #include "predict.h"
29 #include "vec.h"
30 #include "hashtab.h"
31 #include "hash-set.h"
32 #include "machmode.h"
33 #include "hard-reg-set.h"
34 #include "input.h"
35 #include "function.h"
36 #include "dominance.h"
37 #include "basic-block.h"
38 #include "gimple-pretty-print.h"
39 #include "tree-ssa-alias.h"
40 #include "internal-fn.h"
41 #include "tree-eh.h"
42 #include "gimple-expr.h"
43 #include "is-a.h"
44 #include "gimple.h"
45 #include "gimplify.h"
46 #include "gimple-iterator.h"
47 #include "gimple-ssa.h"
48 #include "tree-phinodes.h"
49 #include "ssa-iterators.h"
50 #include "stringpool.h"
51 #include "tree-ssanames.h"
52 #include "cfgloop.h"
53 #include "expr.h"
54 #include "insn-codes.h"
55 #include "optabs.h"
56 #include "params.h"
57 #include "tree-data-ref.h"
58 #include "tree-vectorizer.h"
59 #include "recog.h" /* FIXME: for insn_data */
60 #include "diagnostic-core.h"
61 #include "dumpfile.h"
62 #include "builtins.h"
64 /* Pattern recognition functions */
65 static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
66 tree *);
67 static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
68 tree *);
69 static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
70 tree *);
71 static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
72 tree *);
73 static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
74 static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
75 tree *);
76 static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
77 tree *, tree *);
78 static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
79 static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
80 tree *, tree *);
81 static gimple vect_recog_divmod_pattern (vec<gimple> *,
82 tree *, tree *);
83 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
84 tree *, tree *);
85 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
86 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
87 vect_recog_widen_mult_pattern,
88 vect_recog_widen_sum_pattern,
89 vect_recog_dot_prod_pattern,
90 vect_recog_sad_pattern,
91 vect_recog_pow_pattern,
92 vect_recog_widen_shift_pattern,
93 vect_recog_over_widening_pattern,
94 vect_recog_rotate_pattern,
95 vect_recog_vector_vector_shift_pattern,
96 vect_recog_divmod_pattern,
97 vect_recog_mixed_size_cond_pattern,
98 vect_recog_bool_pattern};
100 static inline void
101 append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
103 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
104 stmt);
107 static inline void
108 new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
110 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
111 append_pattern_def_seq (stmt_info, stmt);
114 /* Check whether STMT2 is in the same loop or basic block as STMT1.
115 Which of the two applies depends on whether we're currently doing
116 loop-based or basic-block-based vectorization, as determined by
117 the vinfo_for_stmt for STMT1 (which must be defined).
119 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
120 to be defined as well. */
122 static bool
123 vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
125 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
126 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
127 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
129 if (!gimple_bb (stmt2))
130 return false;
132 if (loop_vinfo)
134 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
135 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
136 return false;
138 else
140 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
141 || gimple_code (stmt2) == GIMPLE_PHI)
142 return false;
145 gcc_assert (vinfo_for_stmt (stmt2));
146 return true;
149 /* If the LHS of DEF_STMT has a single use, and that statement is
150 in the same loop or basic block, return it. */
152 static gimple
153 vect_single_imm_use (gimple def_stmt)
155 tree lhs = gimple_assign_lhs (def_stmt);
156 use_operand_p use_p;
157 gimple use_stmt;
159 if (!single_imm_use (lhs, &use_p, &use_stmt))
160 return NULL;
162 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
163 return NULL;
165 return use_stmt;
168 /* Check whether NAME, an ssa-name used in USE_STMT,
169 is a result of a type promotion, such that:
170 DEF_STMT: NAME = NOP (name0)
171 If CHECK_SIGN is TRUE, check that either both types are signed or both are
172 unsigned. */
174 static bool
175 type_conversion_p (tree name, gimple use_stmt, bool check_sign,
176 tree *orig_type, gimple *def_stmt, bool *promotion)
178 tree dummy;
179 gimple dummy_gimple;
180 loop_vec_info loop_vinfo;
181 stmt_vec_info stmt_vinfo;
182 tree type = TREE_TYPE (name);
183 tree oprnd0;
184 enum vect_def_type dt;
185 tree def;
186 bb_vec_info bb_vinfo;
188 stmt_vinfo = vinfo_for_stmt (use_stmt);
189 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
190 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
191 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
192 &def, &dt))
193 return false;
195 if (dt != vect_internal_def
196 && dt != vect_external_def && dt != vect_constant_def)
197 return false;
199 if (!*def_stmt)
200 return false;
202 if (!is_gimple_assign (*def_stmt))
203 return false;
205 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
206 return false;
208 oprnd0 = gimple_assign_rhs1 (*def_stmt);
210 *orig_type = TREE_TYPE (oprnd0);
211 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
212 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
213 return false;
215 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
216 *promotion = true;
217 else
218 *promotion = false;
220 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
221 bb_vinfo, &dummy_gimple, &dummy, &dt))
222 return false;
224 return true;
227 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
228 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
230 static tree
231 vect_recog_temp_ssa_var (tree type, gimple stmt)
233 return make_temp_ssa_name (type, stmt, "patt");
236 /* Function vect_recog_dot_prod_pattern
238 Try to find the following pattern:
240 type x_t, y_t;
241 TYPE1 prod;
242 TYPE2 sum = init;
243 loop:
244 sum_0 = phi <init, sum_1>
245 S1 x_t = ...
246 S2 y_t = ...
247 S3 x_T = (TYPE1) x_t;
248 S4 y_T = (TYPE1) y_t;
249 S5 prod = x_T * y_T;
250 [S6 prod = (TYPE2) prod; #optional]
251 S7 sum_1 = prod + sum_0;
253 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
254 same size of 'TYPE1' or bigger. This is a special case of a reduction
255 computation.
257 Input:
259 * STMTS: Contains a stmt from which the pattern search begins. In the
260 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
261 will be detected.
263 Output:
265 * TYPE_IN: The type of the input arguments to the pattern.
267 * TYPE_OUT: The type of the output of this pattern.
269 * Return value: A new stmt that will be used to replace the sequence of
270 stmts that constitute the pattern. In this case it will be:
271 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
273 Note: The dot-prod idiom is a widening reduction pattern that is
274 vectorized without preserving all the intermediate results. It
275 produces only N/2 (widened) results (by summing up pairs of
276 intermediate results) rather than all N results. Therefore, we
277 cannot allow this pattern when we want to get all the results and in
278 the correct order (as is the case when this computation is in an
279 inner-loop nested in an outer-loop that us being vectorized). */
281 static gimple
282 vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
283 tree *type_out)
285 gimple stmt, last_stmt = (*stmts)[0];
286 tree oprnd0, oprnd1;
287 tree oprnd00, oprnd01;
288 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
289 tree type, half_type;
290 gimple pattern_stmt;
291 tree prod_type;
292 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
293 struct loop *loop;
294 tree var;
295 bool promotion;
297 if (!loop_info)
298 return NULL;
300 loop = LOOP_VINFO_LOOP (loop_info);
302 if (!is_gimple_assign (last_stmt))
303 return NULL;
305 type = gimple_expr_type (last_stmt);
307 /* Look for the following pattern
308 DX = (TYPE1) X;
309 DY = (TYPE1) Y;
310 DPROD = DX * DY;
311 DDPROD = (TYPE2) DPROD;
312 sum_1 = DDPROD + sum_0;
313 In which
314 - DX is double the size of X
315 - DY is double the size of Y
316 - DX, DY, DPROD all have the same type
317 - sum is the same size of DPROD or bigger
318 - sum has been recognized as a reduction variable.
320 This is equivalent to:
321 DPROD = X w* Y; #widen mult
322 sum_1 = DPROD w+ sum_0; #widen summation
324 DPROD = X w* Y; #widen mult
325 sum_1 = DPROD + sum_0; #summation
328 /* Starting from LAST_STMT, follow the defs of its uses in search
329 of the above pattern. */
331 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
332 return NULL;
334 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
336 /* Has been detected as widening-summation? */
338 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
339 type = gimple_expr_type (stmt);
340 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
341 return NULL;
342 oprnd0 = gimple_assign_rhs1 (stmt);
343 oprnd1 = gimple_assign_rhs2 (stmt);
344 half_type = TREE_TYPE (oprnd0);
346 else
348 gimple def_stmt;
350 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
351 return NULL;
352 oprnd0 = gimple_assign_rhs1 (last_stmt);
353 oprnd1 = gimple_assign_rhs2 (last_stmt);
354 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
355 || !types_compatible_p (TREE_TYPE (oprnd1), type))
356 return NULL;
357 stmt = last_stmt;
359 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
360 &promotion)
361 && promotion)
363 stmt = def_stmt;
364 oprnd0 = gimple_assign_rhs1 (stmt);
366 else
367 half_type = type;
370 /* So far so good. Since last_stmt was detected as a (summation) reduction,
371 we know that oprnd1 is the reduction variable (defined by a loop-header
372 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
373 Left to check that oprnd0 is defined by a (widen_)mult_expr */
374 if (TREE_CODE (oprnd0) != SSA_NAME)
375 return NULL;
377 prod_type = half_type;
378 stmt = SSA_NAME_DEF_STMT (oprnd0);
380 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
381 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
382 return NULL;
384 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
385 inside the loop (in case we are analyzing an outer-loop). */
386 if (!is_gimple_assign (stmt))
387 return NULL;
388 stmt_vinfo = vinfo_for_stmt (stmt);
389 gcc_assert (stmt_vinfo);
390 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
391 return NULL;
392 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
393 return NULL;
394 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
396 /* Has been detected as a widening multiplication? */
398 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
399 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
400 return NULL;
401 stmt_vinfo = vinfo_for_stmt (stmt);
402 gcc_assert (stmt_vinfo);
403 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
404 oprnd00 = gimple_assign_rhs1 (stmt);
405 oprnd01 = gimple_assign_rhs2 (stmt);
406 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
407 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
409 else
411 tree half_type0, half_type1;
412 gimple def_stmt;
413 tree oprnd0, oprnd1;
415 oprnd0 = gimple_assign_rhs1 (stmt);
416 oprnd1 = gimple_assign_rhs2 (stmt);
417 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
418 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
419 return NULL;
420 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
421 &promotion)
422 || !promotion)
423 return NULL;
424 oprnd00 = gimple_assign_rhs1 (def_stmt);
425 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
426 &promotion)
427 || !promotion)
428 return NULL;
429 oprnd01 = gimple_assign_rhs1 (def_stmt);
430 if (!types_compatible_p (half_type0, half_type1))
431 return NULL;
432 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
433 return NULL;
436 half_type = TREE_TYPE (oprnd00);
437 *type_in = half_type;
438 *type_out = type;
440 /* Pattern detected. Create a stmt to be used to replace the pattern: */
441 var = vect_recog_temp_ssa_var (type, NULL);
442 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
443 oprnd00, oprnd01, oprnd1);
445 if (dump_enabled_p ())
447 dump_printf_loc (MSG_NOTE, vect_location,
448 "vect_recog_dot_prod_pattern: detected: ");
449 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
450 dump_printf (MSG_NOTE, "\n");
453 /* We don't allow changing the order of the computation in the inner-loop
454 when doing outer-loop vectorization. */
455 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
457 return pattern_stmt;
461 /* Function vect_recog_sad_pattern
463 Try to find the following Sum of Absolute Difference (SAD) pattern:
465 type x_t, y_t;
466 signed TYPE1 diff, abs_diff;
467 TYPE2 sum = init;
468 loop:
469 sum_0 = phi <init, sum_1>
470 S1 x_t = ...
471 S2 y_t = ...
472 S3 x_T = (TYPE1) x_t;
473 S4 y_T = (TYPE1) y_t;
474 S5 diff = x_T - y_T;
475 S6 abs_diff = ABS_EXPR <diff>;
476 [S7 abs_diff = (TYPE2) abs_diff; #optional]
477 S8 sum_1 = abs_diff + sum_0;
479 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
480 same size of 'TYPE1' or bigger. This is a special case of a reduction
481 computation.
483 Input:
485 * STMTS: Contains a stmt from which the pattern search begins. In the
486 example, when this function is called with S8, the pattern
487 {S3,S4,S5,S6,S7,S8} will be detected.
489 Output:
491 * TYPE_IN: The type of the input arguments to the pattern.
493 * TYPE_OUT: The type of the output of this pattern.
495 * Return value: A new stmt that will be used to replace the sequence of
496 stmts that constitute the pattern. In this case it will be:
497 SAD_EXPR <x_t, y_t, sum_0>
500 static gimple
501 vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
502 tree *type_out)
504 gimple last_stmt = (*stmts)[0];
505 tree sad_oprnd0, sad_oprnd1;
506 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
507 tree half_type;
508 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
509 struct loop *loop;
510 bool promotion;
512 if (!loop_info)
513 return NULL;
515 loop = LOOP_VINFO_LOOP (loop_info);
517 if (!is_gimple_assign (last_stmt))
518 return NULL;
520 tree sum_type = gimple_expr_type (last_stmt);
522 /* Look for the following pattern
523 DX = (TYPE1) X;
524 DY = (TYPE1) Y;
525 DDIFF = DX - DY;
526 DAD = ABS_EXPR <DDIFF>;
527 DDPROD = (TYPE2) DPROD;
528 sum_1 = DAD + sum_0;
529 In which
530 - DX is at least double the size of X
531 - DY is at least double the size of Y
532 - DX, DY, DDIFF, DAD all have the same type
533 - sum is the same size of DAD or bigger
534 - sum has been recognized as a reduction variable.
536 This is equivalent to:
537 DDIFF = X w- Y; #widen sub
538 DAD = ABS_EXPR <DDIFF>;
539 sum_1 = DAD w+ sum_0; #widen summation
541 DDIFF = X w- Y; #widen sub
542 DAD = ABS_EXPR <DDIFF>;
543 sum_1 = DAD + sum_0; #summation
546 /* Starting from LAST_STMT, follow the defs of its uses in search
547 of the above pattern. */
549 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
550 return NULL;
552 tree plus_oprnd0, plus_oprnd1;
554 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
556 /* Has been detected as widening-summation? */
558 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
559 sum_type = gimple_expr_type (stmt);
560 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
561 return NULL;
562 plus_oprnd0 = gimple_assign_rhs1 (stmt);
563 plus_oprnd1 = gimple_assign_rhs2 (stmt);
564 half_type = TREE_TYPE (plus_oprnd0);
566 else
568 gimple def_stmt;
570 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
571 return NULL;
572 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
573 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
574 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
575 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
576 return NULL;
578 /* The type conversion could be promotion, demotion,
579 or just signed -> unsigned. */
580 if (type_conversion_p (plus_oprnd0, last_stmt, false,
581 &half_type, &def_stmt, &promotion))
582 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
583 else
584 half_type = sum_type;
587 /* So far so good. Since last_stmt was detected as a (summation) reduction,
588 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
589 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
590 Then check that plus_oprnd0 is defined by an abs_expr. */
592 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
593 return NULL;
595 tree abs_type = half_type;
596 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
598 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
599 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
600 return NULL;
602 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
603 inside the loop (in case we are analyzing an outer-loop). */
604 if (!is_gimple_assign (abs_stmt))
605 return NULL;
607 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
608 gcc_assert (abs_stmt_vinfo);
609 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
610 return NULL;
611 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
612 return NULL;
614 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
615 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
616 return NULL;
617 if (TYPE_UNSIGNED (abs_type))
618 return NULL;
620 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
622 if (TREE_CODE (abs_oprnd) != SSA_NAME)
623 return NULL;
625 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
627 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
628 if (!gimple_bb (diff_stmt)
629 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
630 return NULL;
632 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
633 inside the loop (in case we are analyzing an outer-loop). */
634 if (!is_gimple_assign (diff_stmt))
635 return NULL;
637 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
638 gcc_assert (diff_stmt_vinfo);
639 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
640 return NULL;
641 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
642 return NULL;
644 tree half_type0, half_type1;
645 gimple def_stmt;
647 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
648 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
650 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
651 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
652 return NULL;
653 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
654 &half_type0, &def_stmt, &promotion)
655 || !promotion)
656 return NULL;
657 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
659 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
660 &half_type1, &def_stmt, &promotion)
661 || !promotion)
662 return NULL;
663 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
665 if (!types_compatible_p (half_type0, half_type1))
666 return NULL;
667 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
668 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
669 return NULL;
671 *type_in = TREE_TYPE (sad_oprnd0);
672 *type_out = sum_type;
674 /* Pattern detected. Create a stmt to be used to replace the pattern: */
675 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
676 gimple pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
677 sad_oprnd1, plus_oprnd1);
679 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location,
682 "vect_recog_sad_pattern: detected: ");
683 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
684 dump_printf (MSG_NOTE, "\n");
687 /* We don't allow changing the order of the computation in the inner-loop
688 when doing outer-loop vectorization. */
689 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
691 return pattern_stmt;
695 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
696 and LSHIFT_EXPR.
698 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
699 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
701 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
702 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
703 that satisfies the above restrictions, we can perform a widening opeartion
704 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
705 with a_it = (interm_type) a_t; */
707 static bool
708 vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
709 tree const_oprnd, tree *oprnd,
710 vec<gimple> *stmts, tree type,
711 tree *half_type, gimple def_stmt)
713 tree new_type, new_oprnd;
714 gimple new_stmt;
716 if (code != MULT_EXPR && code != LSHIFT_EXPR)
717 return false;
719 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
720 || (code == LSHIFT_EXPR
721 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
722 != 1))
723 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
725 /* CONST_OPRND is a constant of HALF_TYPE. */
726 *oprnd = gimple_assign_rhs1 (def_stmt);
727 return true;
730 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
731 return false;
733 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
734 return false;
736 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
737 a type 2 times bigger than HALF_TYPE. */
738 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
739 TYPE_UNSIGNED (type));
740 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
741 || (code == LSHIFT_EXPR
742 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
743 return false;
745 /* Use NEW_TYPE for widening operation. */
746 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
748 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
749 /* Check if the already created pattern stmt is what we need. */
750 if (!is_gimple_assign (new_stmt)
751 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
752 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
753 return false;
755 stmts->safe_push (def_stmt);
756 *oprnd = gimple_assign_lhs (new_stmt);
758 else
760 /* Create a_T = (NEW_TYPE) a_t; */
761 *oprnd = gimple_assign_rhs1 (def_stmt);
762 new_oprnd = make_ssa_name (new_type);
763 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
764 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
765 stmts->safe_push (def_stmt);
766 *oprnd = new_oprnd;
769 *half_type = new_type;
770 return true;
774 /* Function vect_recog_widen_mult_pattern
776 Try to find the following pattern:
778 type1 a_t;
779 type2 b_t;
780 TYPE a_T, b_T, prod_T;
782 S1 a_t = ;
783 S2 b_t = ;
784 S3 a_T = (TYPE) a_t;
785 S4 b_T = (TYPE) b_t;
786 S5 prod_T = a_T * b_T;
788 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
790 Also detect unsigned cases:
792 unsigned type1 a_t;
793 unsigned type2 b_t;
794 unsigned TYPE u_prod_T;
795 TYPE a_T, b_T, prod_T;
797 S1 a_t = ;
798 S2 b_t = ;
799 S3 a_T = (TYPE) a_t;
800 S4 b_T = (TYPE) b_t;
801 S5 prod_T = a_T * b_T;
802 S6 u_prod_T = (unsigned TYPE) prod_T;
804 and multiplication by constants:
806 type a_t;
807 TYPE a_T, prod_T;
809 S1 a_t = ;
810 S3 a_T = (TYPE) a_t;
811 S5 prod_T = a_T * CONST;
813 A special case of multiplication by constants is when 'TYPE' is 4 times
814 bigger than 'type', but CONST fits an intermediate type 2 times smaller
815 than 'TYPE'. In that case we create an additional pattern stmt for S3
816 to create a variable of the intermediate type, and perform widen-mult
817 on the intermediate type as well:
819 type a_t;
820 interm_type a_it;
821 TYPE a_T, prod_T, prod_T';
823 S1 a_t = ;
824 S3 a_T = (TYPE) a_t;
825 '--> a_it = (interm_type) a_t;
826 S5 prod_T = a_T * CONST;
827 '--> prod_T' = a_it w* CONST;
829 Input/Output:
831 * STMTS: Contains a stmt from which the pattern search begins. In the
832 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
833 is detected. In case of unsigned widen-mult, the original stmt (S5) is
834 replaced with S6 in STMTS. In case of multiplication by a constant
835 of an intermediate type (the last case above), STMTS also contains S3
836 (inserted before S5).
838 Output:
840 * TYPE_IN: The type of the input arguments to the pattern.
842 * TYPE_OUT: The type of the output of this pattern.
844 * Return value: A new stmt that will be used to replace the sequence of
845 stmts that constitute the pattern. In this case it will be:
846 WIDEN_MULT <a_t, b_t>
847 If the result of WIDEN_MULT needs to be converted to a larger type, the
848 returned stmt will be this type conversion stmt.
851 static gimple
852 vect_recog_widen_mult_pattern (vec<gimple> *stmts,
853 tree *type_in, tree *type_out)
855 gimple last_stmt = stmts->pop ();
856 gimple def_stmt0, def_stmt1;
857 tree oprnd0, oprnd1;
858 tree type, half_type0, half_type1;
859 gimple new_stmt = NULL, pattern_stmt = NULL;
860 tree vectype, vecitype;
861 tree var;
862 enum tree_code dummy_code;
863 int dummy_int;
864 vec<tree> dummy_vec;
865 bool op1_ok;
866 bool promotion;
868 if (!is_gimple_assign (last_stmt))
869 return NULL;
871 type = gimple_expr_type (last_stmt);
873 /* Starting from LAST_STMT, follow the defs of its uses in search
874 of the above pattern. */
876 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
877 return NULL;
879 oprnd0 = gimple_assign_rhs1 (last_stmt);
880 oprnd1 = gimple_assign_rhs2 (last_stmt);
881 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
882 || !types_compatible_p (TREE_TYPE (oprnd1), type))
883 return NULL;
885 /* Check argument 0. */
886 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
887 &promotion)
888 || !promotion)
889 return NULL;
890 /* Check argument 1. */
891 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
892 &def_stmt1, &promotion);
894 if (op1_ok && promotion)
896 oprnd0 = gimple_assign_rhs1 (def_stmt0);
897 oprnd1 = gimple_assign_rhs1 (def_stmt1);
899 else
901 if (TREE_CODE (oprnd1) == INTEGER_CST
902 && TREE_CODE (half_type0) == INTEGER_TYPE
903 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
904 &oprnd0, stmts, type,
905 &half_type0, def_stmt0))
907 half_type1 = half_type0;
908 oprnd1 = fold_convert (half_type1, oprnd1);
910 else
911 return NULL;
914 /* If the two arguments have different sizes, convert the one with
915 the smaller type into the larger type. */
916 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
918 tree* oprnd = NULL;
919 gimple def_stmt = NULL;
921 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
923 def_stmt = def_stmt0;
924 half_type0 = half_type1;
925 oprnd = &oprnd0;
927 else
929 def_stmt = def_stmt1;
930 half_type1 = half_type0;
931 oprnd = &oprnd1;
934 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
935 tree new_oprnd = make_ssa_name (half_type0);
936 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
937 *oprnd = new_oprnd;
940 /* Handle unsigned case. Look for
941 S6 u_prod_T = (unsigned TYPE) prod_T;
942 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
943 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
945 gimple use_stmt;
946 tree use_lhs;
947 tree use_type;
949 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
950 return NULL;
952 use_stmt = vect_single_imm_use (last_stmt);
953 if (!use_stmt || !is_gimple_assign (use_stmt)
954 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
955 return NULL;
957 use_lhs = gimple_assign_lhs (use_stmt);
958 use_type = TREE_TYPE (use_lhs);
959 if (!INTEGRAL_TYPE_P (use_type)
960 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
961 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
962 return NULL;
964 type = use_type;
965 last_stmt = use_stmt;
968 if (!types_compatible_p (half_type0, half_type1))
969 return NULL;
971 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
972 to get an intermediate result of type ITYPE. In this case we need
973 to build a statement to convert this intermediate result to type TYPE. */
974 tree itype = type;
975 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
976 itype = build_nonstandard_integer_type
977 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
978 TYPE_UNSIGNED (type));
980 /* Pattern detected. */
981 if (dump_enabled_p ())
982 dump_printf_loc (MSG_NOTE, vect_location,
983 "vect_recog_widen_mult_pattern: detected:\n");
985 /* Check target support */
986 vectype = get_vectype_for_scalar_type (half_type0);
987 vecitype = get_vectype_for_scalar_type (itype);
988 if (!vectype
989 || !vecitype
990 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
991 vecitype, vectype,
992 &dummy_code, &dummy_code,
993 &dummy_int, &dummy_vec))
994 return NULL;
996 *type_in = vectype;
997 *type_out = get_vectype_for_scalar_type (type);
999 /* Pattern supported. Create a stmt to be used to replace the pattern: */
1000 var = vect_recog_temp_ssa_var (itype, NULL);
1001 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
1003 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1004 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1005 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1006 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1008 /* If the original two operands have different sizes, we may need to convert
1009 the smaller one into the larget type. If this is the case, at this point
1010 the new stmt is already built. */
1011 if (new_stmt)
1013 append_pattern_def_seq (stmt_vinfo, new_stmt);
1014 stmt_vec_info new_stmt_info
1015 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
1016 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1017 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1020 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1021 the result of the widen-mult operation into type TYPE. */
1022 if (itype != type)
1024 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1025 stmt_vec_info pattern_stmt_info
1026 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1027 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1028 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1029 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1030 NOP_EXPR,
1031 gimple_assign_lhs (pattern_stmt));
1034 if (dump_enabled_p ())
1035 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1037 stmts->safe_push (last_stmt);
1038 return pattern_stmt;
1042 /* Function vect_recog_pow_pattern
1044 Try to find the following pattern:
1046 x = POW (y, N);
1048 with POW being one of pow, powf, powi, powif and N being
1049 either 2 or 0.5.
1051 Input:
1053 * LAST_STMT: A stmt from which the pattern search begins.
1055 Output:
1057 * TYPE_IN: The type of the input arguments to the pattern.
1059 * TYPE_OUT: The type of the output of this pattern.
1061 * Return value: A new stmt that will be used to replace the sequence of
1062 stmts that constitute the pattern. In this case it will be:
1063 x = x * x
1065 x = sqrt (x)
1068 static gimple
1069 vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
1070 tree *type_out)
1072 gimple last_stmt = (*stmts)[0];
1073 tree fn, base, exp = NULL;
1074 gimple stmt;
1075 tree var;
1077 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1078 return NULL;
1080 fn = gimple_call_fndecl (last_stmt);
1081 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1082 return NULL;
1084 switch (DECL_FUNCTION_CODE (fn))
1086 case BUILT_IN_POWIF:
1087 case BUILT_IN_POWI:
1088 case BUILT_IN_POWF:
1089 case BUILT_IN_POW:
1090 base = gimple_call_arg (last_stmt, 0);
1091 exp = gimple_call_arg (last_stmt, 1);
1092 if (TREE_CODE (exp) != REAL_CST
1093 && TREE_CODE (exp) != INTEGER_CST)
1094 return NULL;
1095 break;
1097 default:
1098 return NULL;
1101 /* We now have a pow or powi builtin function call with a constant
1102 exponent. */
1104 *type_out = NULL_TREE;
1106 /* Catch squaring. */
1107 if ((tree_fits_shwi_p (exp)
1108 && tree_to_shwi (exp) == 2)
1109 || (TREE_CODE (exp) == REAL_CST
1110 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
1112 *type_in = TREE_TYPE (base);
1114 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1115 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1116 return stmt;
1119 /* Catch square root. */
1120 if (TREE_CODE (exp) == REAL_CST
1121 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1123 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1124 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1125 if (*type_in)
1127 gcall *stmt = gimple_build_call (newfn, 1, base);
1128 if (vectorizable_function (stmt, *type_in, *type_in)
1129 != NULL_TREE)
1131 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1132 gimple_call_set_lhs (stmt, var);
1133 return stmt;
1138 return NULL;
1142 /* Function vect_recog_widen_sum_pattern
1144 Try to find the following pattern:
1146 type x_t;
1147 TYPE x_T, sum = init;
1148 loop:
1149 sum_0 = phi <init, sum_1>
1150 S1 x_t = *p;
1151 S2 x_T = (TYPE) x_t;
1152 S3 sum_1 = x_T + sum_0;
1154 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1155 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1156 a special case of a reduction computation.
1158 Input:
1160 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1161 when this function is called with S3, the pattern {S2,S3} will be detected.
1163 Output:
1165 * TYPE_IN: The type of the input arguments to the pattern.
1167 * TYPE_OUT: The type of the output of this pattern.
1169 * Return value: A new stmt that will be used to replace the sequence of
1170 stmts that constitute the pattern. In this case it will be:
1171 WIDEN_SUM <x_t, sum_0>
1173 Note: The widening-sum idiom is a widening reduction pattern that is
1174 vectorized without preserving all the intermediate results. It
1175 produces only N/2 (widened) results (by summing up pairs of
1176 intermediate results) rather than all N results. Therefore, we
1177 cannot allow this pattern when we want to get all the results and in
1178 the correct order (as is the case when this computation is in an
1179 inner-loop nested in an outer-loop that us being vectorized). */
1181 static gimple
1182 vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
1183 tree *type_out)
1185 gimple stmt, last_stmt = (*stmts)[0];
1186 tree oprnd0, oprnd1;
1187 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1188 tree type, half_type;
1189 gimple pattern_stmt;
1190 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1191 struct loop *loop;
1192 tree var;
1193 bool promotion;
1195 if (!loop_info)
1196 return NULL;
1198 loop = LOOP_VINFO_LOOP (loop_info);
1200 if (!is_gimple_assign (last_stmt))
1201 return NULL;
1203 type = gimple_expr_type (last_stmt);
1205 /* Look for the following pattern
1206 DX = (TYPE) X;
1207 sum_1 = DX + sum_0;
1208 In which DX is at least double the size of X, and sum_1 has been
1209 recognized as a reduction variable.
1212 /* Starting from LAST_STMT, follow the defs of its uses in search
1213 of the above pattern. */
1215 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1216 return NULL;
1218 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
1219 return NULL;
1221 oprnd0 = gimple_assign_rhs1 (last_stmt);
1222 oprnd1 = gimple_assign_rhs2 (last_stmt);
1223 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1224 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1225 return NULL;
1227 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1228 we know that oprnd1 is the reduction variable (defined by a loop-header
1229 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1230 Left to check that oprnd0 is defined by a cast from type 'type' to type
1231 'TYPE'. */
1233 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1234 &promotion)
1235 || !promotion)
1236 return NULL;
1238 oprnd0 = gimple_assign_rhs1 (stmt);
1239 *type_in = half_type;
1240 *type_out = type;
1242 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1243 var = vect_recog_temp_ssa_var (type, NULL);
1244 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1246 if (dump_enabled_p ())
1248 dump_printf_loc (MSG_NOTE, vect_location,
1249 "vect_recog_widen_sum_pattern: detected: ");
1250 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1251 dump_printf (MSG_NOTE, "\n");
1254 /* We don't allow changing the order of the computation in the inner-loop
1255 when doing outer-loop vectorization. */
1256 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
1258 return pattern_stmt;
1262 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1264 Input:
1265 STMT - a statement to check.
1266 DEF - we support operations with two operands, one of which is constant.
1267 The other operand can be defined by a demotion operation, or by a
1268 previous statement in a sequence of over-promoted operations. In the
1269 later case DEF is used to replace that operand. (It is defined by a
1270 pattern statement we created for the previous statement in the
1271 sequence).
1273 Input/output:
1274 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1275 NULL, it's the type of DEF.
1276 STMTS - additional pattern statements. If a pattern statement (type
1277 conversion) is created in this function, its original statement is
1278 added to STMTS.
1280 Output:
1281 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1282 operands to use in the new pattern statement for STMT (will be created
1283 in vect_recog_over_widening_pattern ()).
1284 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1285 statements for STMT: the first one is a type promotion and the second
1286 one is the operation itself. We return the type promotion statement
1287 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1288 the second pattern statement. */
1290 static bool
1291 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1292 tree *op0, tree *op1, gimple *new_def_stmt,
1293 vec<gimple> *stmts)
1295 enum tree_code code;
1296 tree const_oprnd, oprnd;
1297 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1298 gimple def_stmt, new_stmt;
1299 bool first = false;
1300 bool promotion;
1302 *op0 = NULL_TREE;
1303 *op1 = NULL_TREE;
1304 *new_def_stmt = NULL;
1306 if (!is_gimple_assign (stmt))
1307 return false;
1309 code = gimple_assign_rhs_code (stmt);
1310 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1311 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1312 return false;
1314 oprnd = gimple_assign_rhs1 (stmt);
1315 const_oprnd = gimple_assign_rhs2 (stmt);
1316 type = gimple_expr_type (stmt);
1318 if (TREE_CODE (oprnd) != SSA_NAME
1319 || TREE_CODE (const_oprnd) != INTEGER_CST)
1320 return false;
1322 /* If oprnd has other uses besides that in stmt we cannot mark it
1323 as being part of a pattern only. */
1324 if (!has_single_use (oprnd))
1325 return false;
1327 /* If we are in the middle of a sequence, we use DEF from a previous
1328 statement. Otherwise, OPRND has to be a result of type promotion. */
1329 if (*new_type)
1331 half_type = *new_type;
1332 oprnd = def;
1334 else
1336 first = true;
1337 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1338 &promotion)
1339 || !promotion
1340 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1341 return false;
1344 /* Can we perform the operation on a smaller type? */
1345 switch (code)
1347 case BIT_IOR_EXPR:
1348 case BIT_XOR_EXPR:
1349 case BIT_AND_EXPR:
1350 if (!int_fits_type_p (const_oprnd, half_type))
1352 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1353 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1354 return false;
1356 interm_type = build_nonstandard_integer_type (
1357 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1358 if (!int_fits_type_p (const_oprnd, interm_type))
1359 return false;
1362 break;
1364 case LSHIFT_EXPR:
1365 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1366 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1367 return false;
1369 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1370 (e.g., if the original value was char, the shift amount is at most 8
1371 if we want to use short). */
1372 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1373 return false;
1375 interm_type = build_nonstandard_integer_type (
1376 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1378 if (!vect_supportable_shift (code, interm_type))
1379 return false;
1381 break;
1383 case RSHIFT_EXPR:
1384 if (vect_supportable_shift (code, half_type))
1385 break;
1387 /* Try intermediate type - HALF_TYPE is not supported. */
1388 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1389 return false;
1391 interm_type = build_nonstandard_integer_type (
1392 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1394 if (!vect_supportable_shift (code, interm_type))
1395 return false;
1397 break;
1399 default:
1400 gcc_unreachable ();
1403 /* There are four possible cases:
1404 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1405 the first statement in the sequence)
1406 a. The original, HALF_TYPE, is not enough - we replace the promotion
1407 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1408 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1409 promotion.
1410 2. OPRND is defined by a pattern statement we created.
1411 a. Its type is not sufficient for the operation, we create a new stmt:
1412 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1413 this statement in NEW_DEF_STMT, and it is later put in
1414 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1415 b. OPRND is good to use in the new statement. */
1416 if (first)
1418 if (interm_type)
1420 /* Replace the original type conversion HALF_TYPE->TYPE with
1421 HALF_TYPE->INTERM_TYPE. */
1422 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1424 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1425 /* Check if the already created pattern stmt is what we need. */
1426 if (!is_gimple_assign (new_stmt)
1427 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1428 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1429 return false;
1431 stmts->safe_push (def_stmt);
1432 oprnd = gimple_assign_lhs (new_stmt);
1434 else
1436 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1437 oprnd = gimple_assign_rhs1 (def_stmt);
1438 new_oprnd = make_ssa_name (interm_type);
1439 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1440 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1441 stmts->safe_push (def_stmt);
1442 oprnd = new_oprnd;
1445 else
1447 /* Retrieve the operand before the type promotion. */
1448 oprnd = gimple_assign_rhs1 (def_stmt);
1451 else
1453 if (interm_type)
1455 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1456 new_oprnd = make_ssa_name (interm_type);
1457 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1458 oprnd = new_oprnd;
1459 *new_def_stmt = new_stmt;
1462 /* Otherwise, OPRND is already set. */
1465 if (interm_type)
1466 *new_type = interm_type;
1467 else
1468 *new_type = half_type;
1470 *op0 = oprnd;
1471 *op1 = fold_convert (*new_type, const_oprnd);
1473 return true;
1477 /* Try to find a statement or a sequence of statements that can be performed
1478 on a smaller type:
1480 type x_t;
1481 TYPE x_T, res0_T, res1_T;
1482 loop:
1483 S1 x_t = *p;
1484 S2 x_T = (TYPE) x_t;
1485 S3 res0_T = op (x_T, C0);
1486 S4 res1_T = op (res0_T, C1);
1487 S5 ... = () res1_T; - type demotion
1489 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1490 constants.
1491 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1492 be 'type' or some intermediate type. For now, we expect S5 to be a type
1493 demotion operation. We also check that S3 and S4 have only one use. */
1495 static gimple
1496 vect_recog_over_widening_pattern (vec<gimple> *stmts,
1497 tree *type_in, tree *type_out)
1499 gimple stmt = stmts->pop ();
1500 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1501 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1502 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1503 bool first;
1504 tree type = NULL;
1506 first = true;
1507 while (1)
1509 if (!vinfo_for_stmt (stmt)
1510 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1511 return NULL;
1513 new_def_stmt = NULL;
1514 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1515 &op0, &op1, &new_def_stmt,
1516 stmts))
1518 if (first)
1519 return NULL;
1520 else
1521 break;
1524 /* STMT can be performed on a smaller type. Check its uses. */
1525 use_stmt = vect_single_imm_use (stmt);
1526 if (!use_stmt || !is_gimple_assign (use_stmt))
1527 return NULL;
1529 /* Create pattern statement for STMT. */
1530 vectype = get_vectype_for_scalar_type (new_type);
1531 if (!vectype)
1532 return NULL;
1534 /* We want to collect all the statements for which we create pattern
1535 statetments, except for the case when the last statement in the
1536 sequence doesn't have a corresponding pattern statement. In such
1537 case we associate the last pattern statement with the last statement
1538 in the sequence. Therefore, we only add the original statement to
1539 the list if we know that it is not the last. */
1540 if (prev_stmt)
1541 stmts->safe_push (prev_stmt);
1543 var = vect_recog_temp_ssa_var (new_type, NULL);
1544 pattern_stmt
1545 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1546 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1547 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1549 if (dump_enabled_p ())
1551 dump_printf_loc (MSG_NOTE, vect_location,
1552 "created pattern stmt: ");
1553 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1554 dump_printf (MSG_NOTE, "\n");
1557 type = gimple_expr_type (stmt);
1558 prev_stmt = stmt;
1559 stmt = use_stmt;
1561 first = false;
1564 /* We got a sequence. We expect it to end with a type demotion operation.
1565 Otherwise, we quit (for now). There are three possible cases: the
1566 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1567 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1568 NEW_TYPE differs (we create a new conversion statement). */
1569 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1571 use_lhs = gimple_assign_lhs (use_stmt);
1572 use_type = TREE_TYPE (use_lhs);
1573 /* Support only type demotion or signedess change. */
1574 if (!INTEGRAL_TYPE_P (use_type)
1575 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1576 return NULL;
1578 /* Check that NEW_TYPE is not bigger than the conversion result. */
1579 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1580 return NULL;
1582 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1583 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1585 /* Create NEW_TYPE->USE_TYPE conversion. */
1586 new_oprnd = make_ssa_name (use_type);
1587 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1588 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1590 *type_in = get_vectype_for_scalar_type (new_type);
1591 *type_out = get_vectype_for_scalar_type (use_type);
1593 /* We created a pattern statement for the last statement in the
1594 sequence, so we don't need to associate it with the pattern
1595 statement created for PREV_STMT. Therefore, we add PREV_STMT
1596 to the list in order to mark it later in vect_pattern_recog_1. */
1597 if (prev_stmt)
1598 stmts->safe_push (prev_stmt);
1600 else
1602 if (prev_stmt)
1603 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1604 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1606 *type_in = vectype;
1607 *type_out = NULL_TREE;
1610 stmts->safe_push (use_stmt);
1612 else
1613 /* TODO: support general case, create a conversion to the correct type. */
1614 return NULL;
1616 /* Pattern detected. */
1617 if (dump_enabled_p ())
1619 dump_printf_loc (MSG_NOTE, vect_location,
1620 "vect_recog_over_widening_pattern: detected: ");
1621 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1622 dump_printf (MSG_NOTE, "\n");
1625 return pattern_stmt;
1628 /* Detect widening shift pattern:
1630 type a_t;
1631 TYPE a_T, res_T;
1633 S1 a_t = ;
1634 S2 a_T = (TYPE) a_t;
1635 S3 res_T = a_T << CONST;
1637 where type 'TYPE' is at least double the size of type 'type'.
1639 Also detect cases where the shift result is immediately converted
1640 to another type 'result_type' that is no larger in size than 'TYPE'.
1641 In those cases we perform a widen-shift that directly results in
1642 'result_type', to avoid a possible over-widening situation:
1644 type a_t;
1645 TYPE a_T, res_T;
1646 result_type res_result;
1648 S1 a_t = ;
1649 S2 a_T = (TYPE) a_t;
1650 S3 res_T = a_T << CONST;
1651 S4 res_result = (result_type) res_T;
1652 '--> res_result' = a_t w<< CONST;
1654 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1655 create an additional pattern stmt for S2 to create a variable of an
1656 intermediate type, and perform widen-shift on the intermediate type:
1658 type a_t;
1659 interm_type a_it;
1660 TYPE a_T, res_T, res_T';
1662 S1 a_t = ;
1663 S2 a_T = (TYPE) a_t;
1664 '--> a_it = (interm_type) a_t;
1665 S3 res_T = a_T << CONST;
1666 '--> res_T' = a_it <<* CONST;
1668 Input/Output:
1670 * STMTS: Contains a stmt from which the pattern search begins.
1671 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1672 in STMTS. When an intermediate type is used and a pattern statement is
1673 created for S2, we also put S2 here (before S3).
1675 Output:
1677 * TYPE_IN: The type of the input arguments to the pattern.
1679 * TYPE_OUT: The type of the output of this pattern.
1681 * Return value: A new stmt that will be used to replace the sequence of
1682 stmts that constitute the pattern. In this case it will be:
1683 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1685 static gimple
1686 vect_recog_widen_shift_pattern (vec<gimple> *stmts,
1687 tree *type_in, tree *type_out)
1689 gimple last_stmt = stmts->pop ();
1690 gimple def_stmt0;
1691 tree oprnd0, oprnd1;
1692 tree type, half_type0;
1693 gimple pattern_stmt;
1694 tree vectype, vectype_out = NULL_TREE;
1695 tree var;
1696 enum tree_code dummy_code;
1697 int dummy_int;
1698 vec<tree> dummy_vec;
1699 gimple use_stmt;
1700 bool promotion;
1702 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1703 return NULL;
1705 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1706 return NULL;
1708 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1709 return NULL;
1711 oprnd0 = gimple_assign_rhs1 (last_stmt);
1712 oprnd1 = gimple_assign_rhs2 (last_stmt);
1713 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1714 return NULL;
1716 /* Check operand 0: it has to be defined by a type promotion. */
1717 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1718 &promotion)
1719 || !promotion)
1720 return NULL;
1722 /* Check operand 1: has to be positive. We check that it fits the type
1723 in vect_handle_widen_op_by_const (). */
1724 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1725 return NULL;
1727 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1728 type = gimple_expr_type (last_stmt);
1730 /* Check for subsequent conversion to another type. */
1731 use_stmt = vect_single_imm_use (last_stmt);
1732 if (use_stmt && is_gimple_assign (use_stmt)
1733 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1734 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1736 tree use_lhs = gimple_assign_lhs (use_stmt);
1737 tree use_type = TREE_TYPE (use_lhs);
1739 if (INTEGRAL_TYPE_P (use_type)
1740 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1742 last_stmt = use_stmt;
1743 type = use_type;
1747 /* Check if this a widening operation. */
1748 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1749 &oprnd0, stmts,
1750 type, &half_type0, def_stmt0))
1751 return NULL;
1753 /* Pattern detected. */
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE, vect_location,
1756 "vect_recog_widen_shift_pattern: detected:\n");
1758 /* Check target support. */
1759 vectype = get_vectype_for_scalar_type (half_type0);
1760 vectype_out = get_vectype_for_scalar_type (type);
1762 if (!vectype
1763 || !vectype_out
1764 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1765 vectype_out, vectype,
1766 &dummy_code, &dummy_code,
1767 &dummy_int, &dummy_vec))
1768 return NULL;
1770 *type_in = vectype;
1771 *type_out = vectype_out;
1773 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1774 var = vect_recog_temp_ssa_var (type, NULL);
1775 pattern_stmt =
1776 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1778 if (dump_enabled_p ())
1779 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1781 stmts->safe_push (last_stmt);
1782 return pattern_stmt;
1785 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1787 type a_t, b_t, c_t;
1789 S0 a_t = b_t r<< c_t;
1791 Input/Output:
1793 * STMTS: Contains a stmt from which the pattern search begins,
1794 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1795 with a sequence:
1797 S1 d_t = -c_t;
1798 S2 e_t = d_t & (B - 1);
1799 S3 f_t = b_t << c_t;
1800 S4 g_t = b_t >> e_t;
1801 S0 a_t = f_t | g_t;
1803 where B is element bitsize of type.
1805 Output:
1807 * TYPE_IN: The type of the input arguments to the pattern.
1809 * TYPE_OUT: The type of the output of this pattern.
1811 * Return value: A new stmt that will be used to replace the rotate
1812 S0 stmt. */
1814 static gimple
1815 vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1817 gimple last_stmt = stmts->pop ();
1818 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1819 gimple pattern_stmt, def_stmt;
1820 enum tree_code rhs_code;
1821 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1822 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1823 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1824 enum vect_def_type dt;
1825 optab optab1, optab2;
1826 edge ext_def = NULL;
1828 if (!is_gimple_assign (last_stmt))
1829 return NULL;
1831 rhs_code = gimple_assign_rhs_code (last_stmt);
1832 switch (rhs_code)
1834 case LROTATE_EXPR:
1835 case RROTATE_EXPR:
1836 break;
1837 default:
1838 return NULL;
1841 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1842 return NULL;
1844 lhs = gimple_assign_lhs (last_stmt);
1845 oprnd0 = gimple_assign_rhs1 (last_stmt);
1846 type = TREE_TYPE (oprnd0);
1847 oprnd1 = gimple_assign_rhs2 (last_stmt);
1848 if (TREE_CODE (oprnd0) != SSA_NAME
1849 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1850 || !INTEGRAL_TYPE_P (type)
1851 || !TYPE_UNSIGNED (type))
1852 return NULL;
1854 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1855 &def, &dt))
1856 return NULL;
1858 if (dt != vect_internal_def
1859 && dt != vect_constant_def
1860 && dt != vect_external_def)
1861 return NULL;
1863 vectype = get_vectype_for_scalar_type (type);
1864 if (vectype == NULL_TREE)
1865 return NULL;
1867 /* If vector/vector or vector/scalar rotate is supported by the target,
1868 don't do anything here. */
1869 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1870 if (optab1
1871 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1872 return NULL;
1874 if (bb_vinfo != NULL || dt != vect_internal_def)
1876 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1877 if (optab2
1878 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1879 return NULL;
1882 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1883 don't do anything here either. */
1884 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1885 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1886 if (!optab1
1887 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1888 || !optab2
1889 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1891 if (bb_vinfo == NULL && dt == vect_internal_def)
1892 return NULL;
1893 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1894 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1895 if (!optab1
1896 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1897 || !optab2
1898 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1899 return NULL;
1902 *type_in = vectype;
1903 *type_out = vectype;
1904 if (*type_in == NULL_TREE)
1905 return NULL;
1907 if (dt == vect_external_def
1908 && TREE_CODE (oprnd1) == SSA_NAME
1909 && loop_vinfo)
1911 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1912 ext_def = loop_preheader_edge (loop);
1913 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1915 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1916 if (bb == NULL
1917 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1918 ext_def = NULL;
1922 def = NULL_TREE;
1923 if (TREE_CODE (oprnd1) == INTEGER_CST
1924 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1925 def = oprnd1;
1926 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1928 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1929 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1930 && TYPE_PRECISION (TREE_TYPE (rhs1))
1931 == TYPE_PRECISION (type))
1932 def = rhs1;
1935 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1936 if (def == NULL_TREE)
1938 def = vect_recog_temp_ssa_var (type, NULL);
1939 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1940 if (ext_def)
1942 basic_block new_bb
1943 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1944 gcc_assert (!new_bb);
1946 else
1947 append_pattern_def_seq (stmt_vinfo, def_stmt);
1949 stype = TREE_TYPE (def);
1951 if (TREE_CODE (def) == INTEGER_CST)
1953 if (!tree_fits_uhwi_p (def)
1954 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1955 || integer_zerop (def))
1956 return NULL;
1957 def2 = build_int_cst (stype,
1958 GET_MODE_PRECISION (TYPE_MODE (type))
1959 - tree_to_uhwi (def));
1961 else
1963 tree vecstype = get_vectype_for_scalar_type (stype);
1964 stmt_vec_info def_stmt_vinfo;
1966 if (vecstype == NULL_TREE)
1967 return NULL;
1968 def2 = vect_recog_temp_ssa_var (stype, NULL);
1969 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1970 if (ext_def)
1972 basic_block new_bb
1973 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1974 gcc_assert (!new_bb);
1976 else
1978 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1979 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1980 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1981 append_pattern_def_seq (stmt_vinfo, def_stmt);
1984 def2 = vect_recog_temp_ssa_var (stype, NULL);
1985 tree mask
1986 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1987 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1988 gimple_assign_lhs (def_stmt), mask);
1989 if (ext_def)
1991 basic_block new_bb
1992 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1993 gcc_assert (!new_bb);
1995 else
1997 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1998 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1999 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2000 append_pattern_def_seq (stmt_vinfo, def_stmt);
2004 var1 = vect_recog_temp_ssa_var (type, NULL);
2005 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2006 ? LSHIFT_EXPR : RSHIFT_EXPR,
2007 oprnd0, def);
2008 append_pattern_def_seq (stmt_vinfo, def_stmt);
2010 var2 = vect_recog_temp_ssa_var (type, NULL);
2011 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2012 ? RSHIFT_EXPR : LSHIFT_EXPR,
2013 oprnd0, def2);
2014 append_pattern_def_seq (stmt_vinfo, def_stmt);
2016 /* Pattern detected. */
2017 if (dump_enabled_p ())
2018 dump_printf_loc (MSG_NOTE, vect_location,
2019 "vect_recog_rotate_pattern: detected:\n");
2021 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2022 var = vect_recog_temp_ssa_var (type, NULL);
2023 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2025 if (dump_enabled_p ())
2026 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2028 stmts->safe_push (last_stmt);
2029 return pattern_stmt;
2032 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2033 vectorized:
2035 type a_t;
2036 TYPE b_T, res_T;
2038 S1 a_t = ;
2039 S2 b_T = ;
2040 S3 res_T = b_T op a_t;
2042 where type 'TYPE' is a type with different size than 'type',
2043 and op is <<, >> or rotate.
2045 Also detect cases:
2047 type a_t;
2048 TYPE b_T, c_T, res_T;
2050 S0 c_T = ;
2051 S1 a_t = (type) c_T;
2052 S2 b_T = ;
2053 S3 res_T = b_T op a_t;
2055 Input/Output:
2057 * STMTS: Contains a stmt from which the pattern search begins,
2058 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2059 with a shift/rotate which has same type on both operands, in the
2060 second case just b_T op c_T, in the first case with added cast
2061 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2063 Output:
2065 * TYPE_IN: The type of the input arguments to the pattern.
2067 * TYPE_OUT: The type of the output of this pattern.
2069 * Return value: A new stmt that will be used to replace the shift/rotate
2070 S3 stmt. */
2072 static gimple
2073 vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
2074 tree *type_in, tree *type_out)
2076 gimple last_stmt = stmts->pop ();
2077 tree oprnd0, oprnd1, lhs, var;
2078 gimple pattern_stmt, def_stmt;
2079 enum tree_code rhs_code;
2080 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2081 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2082 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2083 enum vect_def_type dt;
2084 tree def;
2086 if (!is_gimple_assign (last_stmt))
2087 return NULL;
2089 rhs_code = gimple_assign_rhs_code (last_stmt);
2090 switch (rhs_code)
2092 case LSHIFT_EXPR:
2093 case RSHIFT_EXPR:
2094 case LROTATE_EXPR:
2095 case RROTATE_EXPR:
2096 break;
2097 default:
2098 return NULL;
2101 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2102 return NULL;
2104 lhs = gimple_assign_lhs (last_stmt);
2105 oprnd0 = gimple_assign_rhs1 (last_stmt);
2106 oprnd1 = gimple_assign_rhs2 (last_stmt);
2107 if (TREE_CODE (oprnd0) != SSA_NAME
2108 || TREE_CODE (oprnd1) != SSA_NAME
2109 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2110 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2111 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2112 || TYPE_PRECISION (TREE_TYPE (lhs))
2113 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2114 return NULL;
2116 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
2117 &def, &dt))
2118 return NULL;
2120 if (dt != vect_internal_def)
2121 return NULL;
2123 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2124 *type_out = *type_in;
2125 if (*type_in == NULL_TREE)
2126 return NULL;
2128 def = NULL_TREE;
2129 if (gimple_assign_cast_p (def_stmt))
2131 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2132 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2133 && TYPE_PRECISION (TREE_TYPE (rhs1))
2134 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2135 def = rhs1;
2138 if (def == NULL_TREE)
2140 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2141 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2142 new_pattern_def_seq (stmt_vinfo, def_stmt);
2145 /* Pattern detected. */
2146 if (dump_enabled_p ())
2147 dump_printf_loc (MSG_NOTE, vect_location,
2148 "vect_recog_vector_vector_shift_pattern: detected:\n");
2150 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2151 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2152 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2154 if (dump_enabled_p ())
2155 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2157 stmts->safe_push (last_stmt);
2158 return pattern_stmt;
2161 /* Detect a signed division by a constant that wouldn't be
2162 otherwise vectorized:
2164 type a_t, b_t;
2166 S1 a_t = b_t / N;
2168 where type 'type' is an integral type and N is a constant.
2170 Similarly handle modulo by a constant:
2172 S4 a_t = b_t % N;
2174 Input/Output:
2176 * STMTS: Contains a stmt from which the pattern search begins,
2177 i.e. the division stmt. S1 is replaced by if N is a power
2178 of two constant and type is signed:
2179 S3 y_t = b_t < 0 ? N - 1 : 0;
2180 S2 x_t = b_t + y_t;
2181 S1' a_t = x_t >> log2 (N);
2183 S4 is replaced if N is a power of two constant and
2184 type is signed by (where *_T temporaries have unsigned type):
2185 S9 y_T = b_t < 0 ? -1U : 0U;
2186 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2187 S7 z_t = (type) z_T;
2188 S6 w_t = b_t + z_t;
2189 S5 x_t = w_t & (N - 1);
2190 S4' a_t = x_t - z_t;
2192 Output:
2194 * TYPE_IN: The type of the input arguments to the pattern.
2196 * TYPE_OUT: The type of the output of this pattern.
2198 * Return value: A new stmt that will be used to replace the division
2199 S1 or modulo S4 stmt. */
2201 static gimple
2202 vect_recog_divmod_pattern (vec<gimple> *stmts,
2203 tree *type_in, tree *type_out)
2205 gimple last_stmt = stmts->pop ();
2206 tree oprnd0, oprnd1, vectype, itype, cond;
2207 gimple pattern_stmt, def_stmt;
2208 enum tree_code rhs_code;
2209 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2210 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2211 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2212 optab optab;
2213 tree q;
2214 int dummy_int, prec;
2215 stmt_vec_info def_stmt_vinfo;
2217 if (!is_gimple_assign (last_stmt))
2218 return NULL;
2220 rhs_code = gimple_assign_rhs_code (last_stmt);
2221 switch (rhs_code)
2223 case TRUNC_DIV_EXPR:
2224 case TRUNC_MOD_EXPR:
2225 break;
2226 default:
2227 return NULL;
2230 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2231 return NULL;
2233 oprnd0 = gimple_assign_rhs1 (last_stmt);
2234 oprnd1 = gimple_assign_rhs2 (last_stmt);
2235 itype = TREE_TYPE (oprnd0);
2236 if (TREE_CODE (oprnd0) != SSA_NAME
2237 || TREE_CODE (oprnd1) != INTEGER_CST
2238 || TREE_CODE (itype) != INTEGER_TYPE
2239 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2240 return NULL;
2242 vectype = get_vectype_for_scalar_type (itype);
2243 if (vectype == NULL_TREE)
2244 return NULL;
2246 /* If the target can handle vectorized division or modulo natively,
2247 don't attempt to optimize this. */
2248 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2249 if (optab != unknown_optab)
2251 machine_mode vec_mode = TYPE_MODE (vectype);
2252 int icode = (int) optab_handler (optab, vec_mode);
2253 if (icode != CODE_FOR_nothing)
2254 return NULL;
2257 prec = TYPE_PRECISION (itype);
2258 if (integer_pow2p (oprnd1))
2260 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2261 return NULL;
2263 /* Pattern detected. */
2264 if (dump_enabled_p ())
2265 dump_printf_loc (MSG_NOTE, vect_location,
2266 "vect_recog_divmod_pattern: detected:\n");
2268 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2269 build_int_cst (itype, 0));
2270 if (rhs_code == TRUNC_DIV_EXPR)
2272 tree var = vect_recog_temp_ssa_var (itype, NULL);
2273 tree shift;
2274 def_stmt
2275 = gimple_build_assign (var, COND_EXPR, cond,
2276 fold_build2 (MINUS_EXPR, itype, oprnd1,
2277 build_int_cst (itype, 1)),
2278 build_int_cst (itype, 0));
2279 new_pattern_def_seq (stmt_vinfo, def_stmt);
2280 var = vect_recog_temp_ssa_var (itype, NULL);
2281 def_stmt
2282 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2283 gimple_assign_lhs (def_stmt));
2284 append_pattern_def_seq (stmt_vinfo, def_stmt);
2286 shift = build_int_cst (itype, tree_log2 (oprnd1));
2287 pattern_stmt
2288 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2289 RSHIFT_EXPR, var, shift);
2291 else
2293 tree signmask;
2294 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2295 if (compare_tree_int (oprnd1, 2) == 0)
2297 signmask = vect_recog_temp_ssa_var (itype, NULL);
2298 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2299 build_int_cst (itype, 1),
2300 build_int_cst (itype, 0));
2301 append_pattern_def_seq (stmt_vinfo, def_stmt);
2303 else
2305 tree utype
2306 = build_nonstandard_integer_type (prec, 1);
2307 tree vecutype = get_vectype_for_scalar_type (utype);
2308 tree shift
2309 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2310 - tree_log2 (oprnd1));
2311 tree var = vect_recog_temp_ssa_var (utype, NULL);
2313 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2314 build_int_cst (utype, -1),
2315 build_int_cst (utype, 0));
2316 def_stmt_vinfo
2317 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2318 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2319 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2320 append_pattern_def_seq (stmt_vinfo, def_stmt);
2321 var = vect_recog_temp_ssa_var (utype, NULL);
2322 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2323 gimple_assign_lhs (def_stmt),
2324 shift);
2325 def_stmt_vinfo
2326 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2327 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2328 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2329 append_pattern_def_seq (stmt_vinfo, def_stmt);
2330 signmask = vect_recog_temp_ssa_var (itype, NULL);
2331 def_stmt
2332 = gimple_build_assign (signmask, NOP_EXPR, var);
2333 append_pattern_def_seq (stmt_vinfo, def_stmt);
2335 def_stmt
2336 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2337 PLUS_EXPR, oprnd0, signmask);
2338 append_pattern_def_seq (stmt_vinfo, def_stmt);
2339 def_stmt
2340 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2341 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2342 fold_build2 (MINUS_EXPR, itype, oprnd1,
2343 build_int_cst (itype, 1)));
2344 append_pattern_def_seq (stmt_vinfo, def_stmt);
2346 pattern_stmt
2347 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2348 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2349 signmask);
2352 if (dump_enabled_p ())
2353 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2356 stmts->safe_push (last_stmt);
2358 *type_in = vectype;
2359 *type_out = vectype;
2360 return pattern_stmt;
2363 if (prec > HOST_BITS_PER_WIDE_INT
2364 || integer_zerop (oprnd1))
2365 return NULL;
2367 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2368 return NULL;
2370 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2372 if (TYPE_UNSIGNED (itype))
2374 unsigned HOST_WIDE_INT mh, ml;
2375 int pre_shift, post_shift;
2376 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2377 & GET_MODE_MASK (TYPE_MODE (itype)));
2378 tree t1, t2, t3, t4;
2380 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2381 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2382 return NULL;
2384 /* Find a suitable multiplier and right shift count
2385 instead of multiplying with D. */
2386 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2388 /* If the suggested multiplier is more than SIZE bits, we can do better
2389 for even divisors, using an initial right shift. */
2390 if (mh != 0 && (d & 1) == 0)
2392 pre_shift = floor_log2 (d & -d);
2393 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2394 &ml, &post_shift, &dummy_int);
2395 gcc_assert (!mh);
2397 else
2398 pre_shift = 0;
2400 if (mh != 0)
2402 if (post_shift - 1 >= prec)
2403 return NULL;
2405 /* t1 = oprnd0 h* ml;
2406 t2 = oprnd0 - t1;
2407 t3 = t2 >> 1;
2408 t4 = t1 + t3;
2409 q = t4 >> (post_shift - 1); */
2410 t1 = vect_recog_temp_ssa_var (itype, NULL);
2411 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2412 build_int_cst (itype, ml));
2413 append_pattern_def_seq (stmt_vinfo, def_stmt);
2415 t2 = vect_recog_temp_ssa_var (itype, NULL);
2416 def_stmt
2417 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2418 append_pattern_def_seq (stmt_vinfo, def_stmt);
2420 t3 = vect_recog_temp_ssa_var (itype, NULL);
2421 def_stmt
2422 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2423 append_pattern_def_seq (stmt_vinfo, def_stmt);
2425 t4 = vect_recog_temp_ssa_var (itype, NULL);
2426 def_stmt
2427 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2429 if (post_shift != 1)
2431 append_pattern_def_seq (stmt_vinfo, def_stmt);
2433 q = vect_recog_temp_ssa_var (itype, NULL);
2434 pattern_stmt
2435 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2436 build_int_cst (itype, post_shift - 1));
2438 else
2440 q = t4;
2441 pattern_stmt = def_stmt;
2444 else
2446 if (pre_shift >= prec || post_shift >= prec)
2447 return NULL;
2449 /* t1 = oprnd0 >> pre_shift;
2450 t2 = t1 h* ml;
2451 q = t2 >> post_shift; */
2452 if (pre_shift)
2454 t1 = vect_recog_temp_ssa_var (itype, NULL);
2455 def_stmt
2456 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2457 build_int_cst (NULL, pre_shift));
2458 append_pattern_def_seq (stmt_vinfo, def_stmt);
2460 else
2461 t1 = oprnd0;
2463 t2 = vect_recog_temp_ssa_var (itype, NULL);
2464 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2465 build_int_cst (itype, ml));
2467 if (post_shift)
2469 append_pattern_def_seq (stmt_vinfo, def_stmt);
2471 q = vect_recog_temp_ssa_var (itype, NULL);
2472 def_stmt
2473 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2474 build_int_cst (itype, post_shift));
2476 else
2477 q = t2;
2479 pattern_stmt = def_stmt;
2482 else
2484 unsigned HOST_WIDE_INT ml;
2485 int post_shift;
2486 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2487 unsigned HOST_WIDE_INT abs_d;
2488 bool add = false;
2489 tree t1, t2, t3, t4;
2491 /* Give up for -1. */
2492 if (d == -1)
2493 return NULL;
2495 /* Since d might be INT_MIN, we have to cast to
2496 unsigned HOST_WIDE_INT before negating to avoid
2497 undefined signed overflow. */
2498 abs_d = (d >= 0
2499 ? (unsigned HOST_WIDE_INT) d
2500 : - (unsigned HOST_WIDE_INT) d);
2502 /* n rem d = n rem -d */
2503 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2505 d = abs_d;
2506 oprnd1 = build_int_cst (itype, abs_d);
2508 else if (HOST_BITS_PER_WIDE_INT >= prec
2509 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2510 /* This case is not handled correctly below. */
2511 return NULL;
2513 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2514 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2516 add = true;
2517 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2519 if (post_shift >= prec)
2520 return NULL;
2522 /* t1 = oprnd0 h* ml; */
2523 t1 = vect_recog_temp_ssa_var (itype, NULL);
2524 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2525 build_int_cst (itype, ml));
2527 if (add)
2529 /* t2 = t1 + oprnd0; */
2530 append_pattern_def_seq (stmt_vinfo, def_stmt);
2531 t2 = vect_recog_temp_ssa_var (itype, NULL);
2532 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2534 else
2535 t2 = t1;
2537 if (post_shift)
2539 /* t3 = t2 >> post_shift; */
2540 append_pattern_def_seq (stmt_vinfo, def_stmt);
2541 t3 = vect_recog_temp_ssa_var (itype, NULL);
2542 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2543 build_int_cst (itype, post_shift));
2545 else
2546 t3 = t2;
2548 wide_int oprnd0_min, oprnd0_max;
2549 int msb = 1;
2550 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2552 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2553 msb = 0;
2554 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2555 msb = -1;
2558 if (msb == 0 && d >= 0)
2560 /* q = t3; */
2561 q = t3;
2562 pattern_stmt = def_stmt;
2564 else
2566 /* t4 = oprnd0 >> (prec - 1);
2567 or if we know from VRP that oprnd0 >= 0
2568 t4 = 0;
2569 or if we know from VRP that oprnd0 < 0
2570 t4 = -1; */
2571 append_pattern_def_seq (stmt_vinfo, def_stmt);
2572 t4 = vect_recog_temp_ssa_var (itype, NULL);
2573 if (msb != 1)
2574 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2575 build_int_cst (itype, msb));
2576 else
2577 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2578 build_int_cst (itype, prec - 1));
2579 append_pattern_def_seq (stmt_vinfo, def_stmt);
2581 /* q = t3 - t4; or q = t4 - t3; */
2582 q = vect_recog_temp_ssa_var (itype, NULL);
2583 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2584 d < 0 ? t3 : t4);
2588 if (rhs_code == TRUNC_MOD_EXPR)
2590 tree r, t1;
2592 /* We divided. Now finish by:
2593 t1 = q * oprnd1;
2594 r = oprnd0 - t1; */
2595 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2597 t1 = vect_recog_temp_ssa_var (itype, NULL);
2598 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2599 append_pattern_def_seq (stmt_vinfo, def_stmt);
2601 r = vect_recog_temp_ssa_var (itype, NULL);
2602 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2605 /* Pattern detected. */
2606 if (dump_enabled_p ())
2608 dump_printf_loc (MSG_NOTE, vect_location,
2609 "vect_recog_divmod_pattern: detected: ");
2610 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2611 dump_printf (MSG_NOTE, "\n");
2614 stmts->safe_push (last_stmt);
2616 *type_in = vectype;
2617 *type_out = vectype;
2618 return pattern_stmt;
2621 /* Function vect_recog_mixed_size_cond_pattern
2623 Try to find the following pattern:
2625 type x_t, y_t;
2626 TYPE a_T, b_T, c_T;
2627 loop:
2628 S1 a_T = x_t CMP y_t ? b_T : c_T;
2630 where type 'TYPE' is an integral type which has different size
2631 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2632 than 'type', the constants need to fit into an integer type
2633 with the same width as 'type') or results of conversion from 'type'.
2635 Input:
2637 * LAST_STMT: A stmt from which the pattern search begins.
2639 Output:
2641 * TYPE_IN: The type of the input arguments to the pattern.
2643 * TYPE_OUT: The type of the output of this pattern.
2645 * Return value: A new stmt that will be used to replace the pattern.
2646 Additionally a def_stmt is added.
2648 a_it = x_t CMP y_t ? b_it : c_it;
2649 a_T = (TYPE) a_it; */
2651 static gimple
2652 vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
2653 tree *type_out)
2655 gimple last_stmt = (*stmts)[0];
2656 tree cond_expr, then_clause, else_clause;
2657 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2658 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2659 machine_mode cmpmode;
2660 gimple pattern_stmt, def_stmt;
2661 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2662 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
2663 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2664 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2665 bool promotion;
2666 tree comp_scalar_type;
2668 if (!is_gimple_assign (last_stmt)
2669 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2670 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2671 return NULL;
2673 cond_expr = gimple_assign_rhs1 (last_stmt);
2674 then_clause = gimple_assign_rhs2 (last_stmt);
2675 else_clause = gimple_assign_rhs3 (last_stmt);
2677 if (!COMPARISON_CLASS_P (cond_expr))
2678 return NULL;
2680 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2681 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2682 if (comp_vectype == NULL_TREE)
2683 return NULL;
2685 type = gimple_expr_type (last_stmt);
2686 if (types_compatible_p (type, comp_scalar_type)
2687 || ((TREE_CODE (then_clause) != INTEGER_CST
2688 || TREE_CODE (else_clause) != INTEGER_CST)
2689 && !INTEGRAL_TYPE_P (comp_scalar_type))
2690 || !INTEGRAL_TYPE_P (type))
2691 return NULL;
2693 if ((TREE_CODE (then_clause) != INTEGER_CST
2694 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2695 &def_stmt0, &promotion))
2696 || (TREE_CODE (else_clause) != INTEGER_CST
2697 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2698 &def_stmt1, &promotion)))
2699 return NULL;
2701 if (orig_type0 && orig_type1
2702 && !types_compatible_p (orig_type0, orig_type1))
2703 return NULL;
2705 if (orig_type0)
2707 if (!types_compatible_p (orig_type0, comp_scalar_type))
2708 return NULL;
2709 then_clause = gimple_assign_rhs1 (def_stmt0);
2710 itype = orig_type0;
2713 if (orig_type1)
2715 if (!types_compatible_p (orig_type1, comp_scalar_type))
2716 return NULL;
2717 else_clause = gimple_assign_rhs1 (def_stmt1);
2718 itype = orig_type1;
2721 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2723 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2724 return NULL;
2726 vectype = get_vectype_for_scalar_type (type);
2727 if (vectype == NULL_TREE)
2728 return NULL;
2730 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2731 return NULL;
2733 if (itype == NULL_TREE)
2734 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2735 TYPE_UNSIGNED (type));
2737 if (itype == NULL_TREE
2738 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2739 return NULL;
2741 vecitype = get_vectype_for_scalar_type (itype);
2742 if (vecitype == NULL_TREE)
2743 return NULL;
2745 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2746 return NULL;
2748 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2750 if ((TREE_CODE (then_clause) == INTEGER_CST
2751 && !int_fits_type_p (then_clause, itype))
2752 || (TREE_CODE (else_clause) == INTEGER_CST
2753 && !int_fits_type_p (else_clause, itype)))
2754 return NULL;
2757 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2758 COND_EXPR, unshare_expr (cond_expr),
2759 fold_convert (itype, then_clause),
2760 fold_convert (itype, else_clause));
2761 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2762 NOP_EXPR, gimple_assign_lhs (def_stmt));
2764 new_pattern_def_seq (stmt_vinfo, def_stmt);
2765 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2766 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2767 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2768 *type_in = vecitype;
2769 *type_out = vectype;
2771 if (dump_enabled_p ())
2772 dump_printf_loc (MSG_NOTE, vect_location,
2773 "vect_recog_mixed_size_cond_pattern: detected:\n");
2775 return pattern_stmt;
2779 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2780 true if bool VAR can be optimized that way. */
2782 static bool
2783 check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2785 gimple def_stmt;
2786 enum vect_def_type dt;
2787 tree def, rhs1;
2788 enum tree_code rhs_code;
2790 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2791 &dt))
2792 return false;
2794 if (dt != vect_internal_def)
2795 return false;
2797 if (!is_gimple_assign (def_stmt))
2798 return false;
2800 if (!has_single_use (def))
2801 return false;
2803 rhs1 = gimple_assign_rhs1 (def_stmt);
2804 rhs_code = gimple_assign_rhs_code (def_stmt);
2805 switch (rhs_code)
2807 case SSA_NAME:
2808 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2810 CASE_CONVERT:
2811 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2812 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2813 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2814 return false;
2815 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2817 case BIT_NOT_EXPR:
2818 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
2820 case BIT_AND_EXPR:
2821 case BIT_IOR_EXPR:
2822 case BIT_XOR_EXPR:
2823 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
2824 return false;
2825 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2826 bb_vinfo);
2828 default:
2829 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2831 tree vecitype, comp_vectype;
2833 /* If the comparison can throw, then is_gimple_condexpr will be
2834 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2835 if (stmt_could_throw_p (def_stmt))
2836 return false;
2838 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2839 if (comp_vectype == NULL_TREE)
2840 return false;
2842 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2844 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2845 tree itype
2846 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2847 vecitype = get_vectype_for_scalar_type (itype);
2848 if (vecitype == NULL_TREE)
2849 return false;
2851 else
2852 vecitype = comp_vectype;
2853 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2855 return false;
2860 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2861 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2862 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2864 static tree
2865 adjust_bool_pattern_cast (tree type, tree var)
2867 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2868 gimple cast_stmt, pattern_stmt;
2870 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2871 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2872 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2873 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2874 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2875 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2876 return gimple_assign_lhs (cast_stmt);
2880 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2881 recursively. VAR is an SSA_NAME that should be transformed from bool
2882 to a wider integer type, OUT_TYPE is the desired final integer type of
2883 the whole pattern, TRUEVAL should be NULL unless optimizing
2884 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2885 in the then_clause, STMTS is where statements with added pattern stmts
2886 should be pushed to. */
2888 static tree
2889 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2890 vec<gimple> *stmts)
2892 gimple stmt = SSA_NAME_DEF_STMT (var);
2893 enum tree_code rhs_code, def_rhs_code;
2894 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2895 location_t loc;
2896 gimple pattern_stmt, def_stmt;
2898 rhs1 = gimple_assign_rhs1 (stmt);
2899 rhs2 = gimple_assign_rhs2 (stmt);
2900 rhs_code = gimple_assign_rhs_code (stmt);
2901 loc = gimple_location (stmt);
2902 switch (rhs_code)
2904 case SSA_NAME:
2905 CASE_CONVERT:
2906 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2907 itype = TREE_TYPE (irhs1);
2908 pattern_stmt
2909 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2910 SSA_NAME, irhs1);
2911 break;
2913 case BIT_NOT_EXPR:
2914 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2915 itype = TREE_TYPE (irhs1);
2916 pattern_stmt
2917 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2918 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
2919 break;
2921 case BIT_AND_EXPR:
2922 /* Try to optimize x = y & (a < b ? 1 : 0); into
2923 x = (a < b ? y : 0);
2925 E.g. for:
2926 bool a_b, b_b, c_b;
2927 TYPE d_T;
2929 S1 a_b = x1 CMP1 y1;
2930 S2 b_b = x2 CMP2 y2;
2931 S3 c_b = a_b & b_b;
2932 S4 d_T = (TYPE) c_b;
2934 we would normally emit:
2936 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2937 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2938 S3' c_T = a_T & b_T;
2939 S4' d_T = c_T;
2941 but we can save one stmt by using the
2942 result of one of the COND_EXPRs in the other COND_EXPR and leave
2943 BIT_AND_EXPR stmt out:
2945 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2946 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2947 S4' f_T = c_T;
2949 At least when VEC_COND_EXPR is implemented using masks
2950 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2951 computes the comparison masks and ands it, in one case with
2952 all ones vector, in the other case with a vector register.
2953 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2954 often more expensive. */
2955 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2956 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2957 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2959 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2960 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2961 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2962 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2964 gimple tstmt;
2965 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2966 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2967 tstmt = stmts->pop ();
2968 gcc_assert (tstmt == def_stmt);
2969 stmts->quick_push (stmt);
2970 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2971 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2972 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2973 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2974 return irhs2;
2976 else
2977 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2978 goto and_ior_xor;
2980 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2981 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2982 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2984 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2985 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2986 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2987 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2989 gimple tstmt;
2990 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2991 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2992 tstmt = stmts->pop ();
2993 gcc_assert (tstmt == def_stmt);
2994 stmts->quick_push (stmt);
2995 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2996 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
2997 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
2998 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2999 return irhs1;
3001 else
3002 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3003 goto and_ior_xor;
3005 /* FALLTHRU */
3006 case BIT_IOR_EXPR:
3007 case BIT_XOR_EXPR:
3008 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3009 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3010 and_ior_xor:
3011 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3012 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3014 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3015 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3016 int out_prec = TYPE_PRECISION (out_type);
3017 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3018 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3019 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3020 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3021 else
3023 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3024 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3027 itype = TREE_TYPE (irhs1);
3028 pattern_stmt
3029 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3030 rhs_code, irhs1, irhs2);
3031 break;
3033 default:
3034 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3035 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3036 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3037 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3038 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3040 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3041 itype
3042 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3044 else
3045 itype = TREE_TYPE (rhs1);
3046 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3047 if (trueval == NULL_TREE)
3048 trueval = build_int_cst (itype, 1);
3049 else
3050 gcc_checking_assert (useless_type_conversion_p (itype,
3051 TREE_TYPE (trueval)));
3052 pattern_stmt
3053 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3054 COND_EXPR, cond_expr, trueval,
3055 build_int_cst (itype, 0));
3056 break;
3059 stmts->safe_push (stmt);
3060 gimple_set_location (pattern_stmt, loc);
3061 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3062 return gimple_assign_lhs (pattern_stmt);
3066 /* Function vect_recog_bool_pattern
3068 Try to find pattern like following:
3070 bool a_b, b_b, c_b, d_b, e_b;
3071 TYPE f_T;
3072 loop:
3073 S1 a_b = x1 CMP1 y1;
3074 S2 b_b = x2 CMP2 y2;
3075 S3 c_b = a_b & b_b;
3076 S4 d_b = x3 CMP3 y3;
3077 S5 e_b = c_b | d_b;
3078 S6 f_T = (TYPE) e_b;
3080 where type 'TYPE' is an integral type. Or a similar pattern
3081 ending in
3083 S6 f_Y = e_b ? r_Y : s_Y;
3085 as results from if-conversion of a complex condition.
3087 Input:
3089 * LAST_STMT: A stmt at the end from which the pattern
3090 search begins, i.e. cast of a bool to
3091 an integer type.
3093 Output:
3095 * TYPE_IN: The type of the input arguments to the pattern.
3097 * TYPE_OUT: The type of the output of this pattern.
3099 * Return value: A new stmt that will be used to replace the pattern.
3101 Assuming size of TYPE is the same as size of all comparisons
3102 (otherwise some casts would be added where needed), the above
3103 sequence we create related pattern stmts:
3104 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3105 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3106 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3107 S5' e_T = c_T | d_T;
3108 S6' f_T = e_T;
3110 Instead of the above S3' we could emit:
3111 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3112 S3' c_T = a_T | b_T;
3113 but the above is more efficient. */
3115 static gimple
3116 vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
3117 tree *type_out)
3119 gimple last_stmt = stmts->pop ();
3120 enum tree_code rhs_code;
3121 tree var, lhs, rhs, vectype;
3122 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3123 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3124 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
3125 gimple pattern_stmt;
3127 if (!is_gimple_assign (last_stmt))
3128 return NULL;
3130 var = gimple_assign_rhs1 (last_stmt);
3131 lhs = gimple_assign_lhs (last_stmt);
3133 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3134 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3135 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3136 return NULL;
3138 rhs_code = gimple_assign_rhs_code (last_stmt);
3139 if (CONVERT_EXPR_CODE_P (rhs_code))
3141 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3142 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3143 return NULL;
3144 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3145 if (vectype == NULL_TREE)
3146 return NULL;
3148 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3149 return NULL;
3151 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3152 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3153 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3154 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3155 else
3156 pattern_stmt
3157 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3158 *type_out = vectype;
3159 *type_in = vectype;
3160 stmts->safe_push (last_stmt);
3161 if (dump_enabled_p ())
3162 dump_printf_loc (MSG_NOTE, vect_location,
3163 "vect_recog_bool_pattern: detected:\n");
3165 return pattern_stmt;
3167 else if (rhs_code == COND_EXPR
3168 && TREE_CODE (var) == SSA_NAME)
3170 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3171 if (vectype == NULL_TREE)
3172 return NULL;
3174 /* Build a scalar type for the boolean result that when
3175 vectorized matches the vector type of the result in
3176 size and number of elements. */
3177 unsigned prec
3178 = wi::udiv_trunc (TYPE_SIZE (vectype),
3179 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3180 tree type
3181 = build_nonstandard_integer_type (prec,
3182 TYPE_UNSIGNED (TREE_TYPE (var)));
3183 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3184 return NULL;
3186 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3187 return NULL;
3189 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3190 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3191 pattern_stmt
3192 = gimple_build_assign (lhs, COND_EXPR,
3193 build2 (NE_EXPR, boolean_type_node,
3194 rhs, build_int_cst (type, 0)),
3195 gimple_assign_rhs2 (last_stmt),
3196 gimple_assign_rhs3 (last_stmt));
3197 *type_out = vectype;
3198 *type_in = vectype;
3199 stmts->safe_push (last_stmt);
3200 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_NOTE, vect_location,
3202 "vect_recog_bool_pattern: detected:\n");
3204 return pattern_stmt;
3206 else if (rhs_code == SSA_NAME
3207 && STMT_VINFO_DATA_REF (stmt_vinfo))
3209 stmt_vec_info pattern_stmt_info;
3210 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3211 gcc_assert (vectype != NULL_TREE);
3212 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3213 return NULL;
3214 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3215 return NULL;
3217 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3218 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3219 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3221 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3222 gimple cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3223 new_pattern_def_seq (stmt_vinfo, cast_stmt);
3224 rhs = rhs2;
3226 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3227 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3228 bb_vinfo);
3229 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3230 STMT_VINFO_DATA_REF (pattern_stmt_info)
3231 = STMT_VINFO_DATA_REF (stmt_vinfo);
3232 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3233 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3234 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3235 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3236 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3237 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3238 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3239 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3240 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3241 *type_out = vectype;
3242 *type_in = vectype;
3243 stmts->safe_push (last_stmt);
3244 if (dump_enabled_p ())
3245 dump_printf_loc (MSG_NOTE, vect_location,
3246 "vect_recog_bool_pattern: detected:\n");
3247 return pattern_stmt;
3249 else
3250 return NULL;
3254 /* Mark statements that are involved in a pattern. */
3256 static inline void
3257 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3258 tree pattern_vectype)
3260 stmt_vec_info pattern_stmt_info, def_stmt_info;
3261 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3262 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
3263 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
3264 gimple def_stmt;
3266 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3267 if (pattern_stmt_info == NULL)
3269 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3270 bb_vinfo);
3271 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3273 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3275 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3276 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3277 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3278 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3279 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3280 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3281 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3282 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3283 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3285 gimple_stmt_iterator si;
3286 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3287 !gsi_end_p (si); gsi_next (&si))
3289 def_stmt = gsi_stmt (si);
3290 def_stmt_info = vinfo_for_stmt (def_stmt);
3291 if (def_stmt_info == NULL)
3293 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3294 bb_vinfo);
3295 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3297 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3298 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3299 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3300 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3301 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3306 /* Function vect_pattern_recog_1
3308 Input:
3309 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3310 computation pattern.
3311 STMT: A stmt from which the pattern search should start.
3313 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3314 expression that computes the same functionality and can be used to
3315 replace the sequence of stmts that are involved in the pattern.
3317 Output:
3318 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3319 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3320 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3321 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3322 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3323 to the available target pattern.
3325 This function also does some bookkeeping, as explained in the documentation
3326 for vect_recog_pattern. */
3328 static void
3329 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3330 gimple_stmt_iterator si,
3331 vec<gimple> *stmts_to_replace)
3333 gimple stmt = gsi_stmt (si), pattern_stmt;
3334 stmt_vec_info stmt_info;
3335 loop_vec_info loop_vinfo;
3336 tree pattern_vectype;
3337 tree type_in, type_out;
3338 enum tree_code code;
3339 int i;
3340 gimple next;
3342 stmts_to_replace->truncate (0);
3343 stmts_to_replace->quick_push (stmt);
3344 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3345 if (!pattern_stmt)
3346 return;
3348 stmt = stmts_to_replace->last ();
3349 stmt_info = vinfo_for_stmt (stmt);
3350 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3352 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3354 /* No need to check target support (already checked by the pattern
3355 recognition function). */
3356 pattern_vectype = type_out ? type_out : type_in;
3358 else
3360 machine_mode vec_mode;
3361 enum insn_code icode;
3362 optab optab;
3364 /* Check target support */
3365 type_in = get_vectype_for_scalar_type (type_in);
3366 if (!type_in)
3367 return;
3368 if (type_out)
3369 type_out = get_vectype_for_scalar_type (type_out);
3370 else
3371 type_out = type_in;
3372 if (!type_out)
3373 return;
3374 pattern_vectype = type_out;
3376 if (is_gimple_assign (pattern_stmt))
3377 code = gimple_assign_rhs_code (pattern_stmt);
3378 else
3380 gcc_assert (is_gimple_call (pattern_stmt));
3381 code = CALL_EXPR;
3384 optab = optab_for_tree_code (code, type_in, optab_default);
3385 vec_mode = TYPE_MODE (type_in);
3386 if (!optab
3387 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3388 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3389 return;
3392 /* Found a vectorizable pattern. */
3393 if (dump_enabled_p ())
3395 dump_printf_loc (MSG_NOTE, vect_location,
3396 "pattern recognized: ");
3397 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3398 dump_printf (MSG_NOTE, "\n");
3401 /* Mark the stmts that are involved in the pattern. */
3402 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3404 /* Patterns cannot be vectorized using SLP, because they change the order of
3405 computation. */
3406 if (loop_vinfo)
3407 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3408 if (next == stmt)
3409 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3411 /* It is possible that additional pattern stmts are created and inserted in
3412 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3413 relevant statements. */
3414 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3415 && (unsigned) i < (stmts_to_replace->length () - 1);
3416 i++)
3418 stmt_info = vinfo_for_stmt (stmt);
3419 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3420 if (dump_enabled_p ())
3422 dump_printf_loc (MSG_NOTE, vect_location,
3423 "additional pattern stmt: ");
3424 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3425 dump_printf (MSG_NOTE, "\n");
3428 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3433 /* Function vect_pattern_recog
3435 Input:
3436 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3437 computation idioms.
3439 Output - for each computation idiom that is detected we create a new stmt
3440 that provides the same functionality and that can be vectorized. We
3441 also record some information in the struct_stmt_info of the relevant
3442 stmts, as explained below:
3444 At the entry to this function we have the following stmts, with the
3445 following initial value in the STMT_VINFO fields:
3447 stmt in_pattern_p related_stmt vec_stmt
3448 S1: a_i = .... - - -
3449 S2: a_2 = ..use(a_i).. - - -
3450 S3: a_1 = ..use(a_2).. - - -
3451 S4: a_0 = ..use(a_1).. - - -
3452 S5: ... = ..use(a_0).. - - -
3454 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3455 represented by a single stmt. We then:
3456 - create a new stmt S6 equivalent to the pattern (the stmt is not
3457 inserted into the code)
3458 - fill in the STMT_VINFO fields as follows:
3460 in_pattern_p related_stmt vec_stmt
3461 S1: a_i = .... - - -
3462 S2: a_2 = ..use(a_i).. - - -
3463 S3: a_1 = ..use(a_2).. - - -
3464 S4: a_0 = ..use(a_1).. true S6 -
3465 '---> S6: a_new = .... - S4 -
3466 S5: ... = ..use(a_0).. - - -
3468 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3469 to each other through the RELATED_STMT field).
3471 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3472 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3473 remain irrelevant unless used by stmts other than S4.
3475 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3476 (because they are marked as irrelevant). It will vectorize S6, and record
3477 a pointer to the new vector stmt VS6 from S6 (as usual).
3478 S4 will be skipped, and S5 will be vectorized as usual:
3480 in_pattern_p related_stmt vec_stmt
3481 S1: a_i = .... - - -
3482 S2: a_2 = ..use(a_i).. - - -
3483 S3: a_1 = ..use(a_2).. - - -
3484 > VS6: va_new = .... - - -
3485 S4: a_0 = ..use(a_1).. true S6 VS6
3486 '---> S6: a_new = .... - S4 VS6
3487 > VS5: ... = ..vuse(va_new).. - - -
3488 S5: ... = ..use(a_0).. - - -
3490 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3491 elsewhere), and we'll end up with:
3493 VS6: va_new = ....
3494 VS5: ... = ..vuse(va_new)..
3496 In case of more than one pattern statements, e.g., widen-mult with
3497 intermediate type:
3499 S1 a_t = ;
3500 S2 a_T = (TYPE) a_t;
3501 '--> S3: a_it = (interm_type) a_t;
3502 S4 prod_T = a_T * CONST;
3503 '--> S5: prod_T' = a_it w* CONST;
3505 there may be other users of a_T outside the pattern. In that case S2 will
3506 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3507 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3508 be recorded in S3. */
3510 void
3511 vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
3513 struct loop *loop;
3514 basic_block *bbs;
3515 unsigned int nbbs;
3516 gimple_stmt_iterator si;
3517 unsigned int i, j;
3518 vect_recog_func_ptr vect_recog_func;
3519 auto_vec<gimple, 1> stmts_to_replace;
3520 gimple stmt;
3522 if (dump_enabled_p ())
3523 dump_printf_loc (MSG_NOTE, vect_location,
3524 "=== vect_pattern_recog ===\n");
3526 if (loop_vinfo)
3528 loop = LOOP_VINFO_LOOP (loop_vinfo);
3529 bbs = LOOP_VINFO_BBS (loop_vinfo);
3530 nbbs = loop->num_nodes;
3532 else
3534 bbs = &BB_VINFO_BB (bb_vinfo);
3535 nbbs = 1;
3538 /* Scan through the loop stmts, applying the pattern recognition
3539 functions starting at each stmt visited: */
3540 for (i = 0; i < nbbs; i++)
3542 basic_block bb = bbs[i];
3543 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3545 if (bb_vinfo && (stmt = gsi_stmt (si))
3546 && vinfo_for_stmt (stmt)
3547 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3548 continue;
3550 /* Scan over all generic vect_recog_xxx_pattern functions. */
3551 for (j = 0; j < NUM_PATTERNS; j++)
3553 vect_recog_func = vect_vect_recog_func_ptrs[j];
3554 vect_pattern_recog_1 (vect_recog_func, si,
3555 &stmts_to_replace);