* testsuite/experimental/random/randint.cc: Add dg-add-options tls.
[official-gcc.git] / gcc / tree-vect-patterns.c
blobb9d900c1bad93ccedc9ee7f888fc05ad212618ff
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
43 /* Pattern recognition functions */
44 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
45 tree *);
46 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
47 tree *);
48 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
49 tree *);
50 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
53 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
54 tree *);
55 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
56 tree *, tree *);
57 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
58 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
59 tree *, tree *);
60 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
61 tree *, tree *);
63 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
64 tree *, tree *);
66 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
67 tree *, tree *);
68 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
69 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
70 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
71 vect_recog_widen_mult_pattern,
72 vect_recog_widen_sum_pattern,
73 vect_recog_dot_prod_pattern,
74 vect_recog_sad_pattern,
75 vect_recog_pow_pattern,
76 vect_recog_widen_shift_pattern,
77 vect_recog_over_widening_pattern,
78 vect_recog_rotate_pattern,
79 vect_recog_vector_vector_shift_pattern,
80 vect_recog_divmod_pattern,
81 vect_recog_mult_pattern,
82 vect_recog_mixed_size_cond_pattern,
83 vect_recog_bool_pattern,
84 vect_recog_mask_conversion_pattern};
86 static inline void
87 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
89 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
90 stmt);
93 static inline void
94 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
96 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
97 append_pattern_def_seq (stmt_info, stmt);
100 /* Check whether STMT2 is in the same loop or basic block as STMT1.
101 Which of the two applies depends on whether we're currently doing
102 loop-based or basic-block-based vectorization, as determined by
103 the vinfo_for_stmt for STMT1 (which must be defined).
105 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
106 to be defined as well. */
108 static bool
109 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
111 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
112 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
115 /* If the LHS of DEF_STMT has a single use, and that statement is
116 in the same loop or basic block, return it. */
118 static gimple *
119 vect_single_imm_use (gimple *def_stmt)
121 tree lhs = gimple_assign_lhs (def_stmt);
122 use_operand_p use_p;
123 gimple *use_stmt;
125 if (!single_imm_use (lhs, &use_p, &use_stmt))
126 return NULL;
128 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
129 return NULL;
131 return use_stmt;
134 /* Check whether NAME, an ssa-name used in USE_STMT,
135 is a result of a type promotion, such that:
136 DEF_STMT: NAME = NOP (name0)
137 If CHECK_SIGN is TRUE, check that either both types are signed or both are
138 unsigned. */
140 static bool
141 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
142 tree *orig_type, gimple **def_stmt, bool *promotion)
144 gimple *dummy_gimple;
145 stmt_vec_info stmt_vinfo;
146 tree type = TREE_TYPE (name);
147 tree oprnd0;
148 enum vect_def_type dt;
150 stmt_vinfo = vinfo_for_stmt (use_stmt);
151 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
152 return false;
154 if (dt != vect_internal_def
155 && dt != vect_external_def && dt != vect_constant_def)
156 return false;
158 if (!*def_stmt)
159 return false;
161 if (!is_gimple_assign (*def_stmt))
162 return false;
164 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
165 return false;
167 oprnd0 = gimple_assign_rhs1 (*def_stmt);
169 *orig_type = TREE_TYPE (oprnd0);
170 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
171 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
172 return false;
174 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
175 *promotion = true;
176 else
177 *promotion = false;
179 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
180 return false;
182 return true;
185 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
186 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
188 static tree
189 vect_recog_temp_ssa_var (tree type, gimple *stmt)
191 return make_temp_ssa_name (type, stmt, "patt");
194 /* Function vect_recog_dot_prod_pattern
196 Try to find the following pattern:
198 type x_t, y_t;
199 TYPE1 prod;
200 TYPE2 sum = init;
201 loop:
202 sum_0 = phi <init, sum_1>
203 S1 x_t = ...
204 S2 y_t = ...
205 S3 x_T = (TYPE1) x_t;
206 S4 y_T = (TYPE1) y_t;
207 S5 prod = x_T * y_T;
208 [S6 prod = (TYPE2) prod; #optional]
209 S7 sum_1 = prod + sum_0;
211 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
212 same size of 'TYPE1' or bigger. This is a special case of a reduction
213 computation.
215 Input:
217 * STMTS: Contains a stmt from which the pattern search begins. In the
218 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
219 will be detected.
221 Output:
223 * TYPE_IN: The type of the input arguments to the pattern.
225 * TYPE_OUT: The type of the output of this pattern.
227 * Return value: A new stmt that will be used to replace the sequence of
228 stmts that constitute the pattern. In this case it will be:
229 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
231 Note: The dot-prod idiom is a widening reduction pattern that is
232 vectorized without preserving all the intermediate results. It
233 produces only N/2 (widened) results (by summing up pairs of
234 intermediate results) rather than all N results. Therefore, we
235 cannot allow this pattern when we want to get all the results and in
236 the correct order (as is the case when this computation is in an
237 inner-loop nested in an outer-loop that us being vectorized). */
239 static gimple *
240 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
241 tree *type_out)
243 gimple *stmt, *last_stmt = (*stmts)[0];
244 tree oprnd0, oprnd1;
245 tree oprnd00, oprnd01;
246 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
247 tree type, half_type;
248 gimple *pattern_stmt;
249 tree prod_type;
250 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
251 struct loop *loop;
252 tree var;
253 bool promotion;
255 if (!loop_info)
256 return NULL;
258 loop = LOOP_VINFO_LOOP (loop_info);
260 /* We don't allow changing the order of the computation in the inner-loop
261 when doing outer-loop vectorization. */
262 if (loop && nested_in_vect_loop_p (loop, last_stmt))
263 return NULL;
265 if (!is_gimple_assign (last_stmt))
266 return NULL;
268 type = gimple_expr_type (last_stmt);
270 /* Look for the following pattern
271 DX = (TYPE1) X;
272 DY = (TYPE1) Y;
273 DPROD = DX * DY;
274 DDPROD = (TYPE2) DPROD;
275 sum_1 = DDPROD + sum_0;
276 In which
277 - DX is double the size of X
278 - DY is double the size of Y
279 - DX, DY, DPROD all have the same type
280 - sum is the same size of DPROD or bigger
281 - sum has been recognized as a reduction variable.
283 This is equivalent to:
284 DPROD = X w* Y; #widen mult
285 sum_1 = DPROD w+ sum_0; #widen summation
287 DPROD = X w* Y; #widen mult
288 sum_1 = DPROD + sum_0; #summation
291 /* Starting from LAST_STMT, follow the defs of its uses in search
292 of the above pattern. */
294 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
295 return NULL;
297 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
299 /* Has been detected as widening-summation? */
301 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
302 type = gimple_expr_type (stmt);
303 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
304 return NULL;
305 oprnd0 = gimple_assign_rhs1 (stmt);
306 oprnd1 = gimple_assign_rhs2 (stmt);
307 half_type = TREE_TYPE (oprnd0);
309 else
311 gimple *def_stmt;
313 oprnd0 = gimple_assign_rhs1 (last_stmt);
314 oprnd1 = gimple_assign_rhs2 (last_stmt);
315 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
316 || !types_compatible_p (TREE_TYPE (oprnd1), type))
317 return NULL;
318 stmt = last_stmt;
320 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
321 &promotion)
322 && promotion)
324 stmt = def_stmt;
325 oprnd0 = gimple_assign_rhs1 (stmt);
327 else
328 half_type = type;
331 /* So far so good. Since last_stmt was detected as a (summation) reduction,
332 we know that oprnd1 is the reduction variable (defined by a loop-header
333 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
334 Left to check that oprnd0 is defined by a (widen_)mult_expr */
335 if (TREE_CODE (oprnd0) != SSA_NAME)
336 return NULL;
338 prod_type = half_type;
339 stmt = SSA_NAME_DEF_STMT (oprnd0);
341 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
342 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
343 return NULL;
345 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
346 inside the loop (in case we are analyzing an outer-loop). */
347 if (!is_gimple_assign (stmt))
348 return NULL;
349 stmt_vinfo = vinfo_for_stmt (stmt);
350 gcc_assert (stmt_vinfo);
351 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
352 return NULL;
353 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
354 return NULL;
355 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
357 /* Has been detected as a widening multiplication? */
359 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
360 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
361 return NULL;
362 stmt_vinfo = vinfo_for_stmt (stmt);
363 gcc_assert (stmt_vinfo);
364 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
365 oprnd00 = gimple_assign_rhs1 (stmt);
366 oprnd01 = gimple_assign_rhs2 (stmt);
367 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
368 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
370 else
372 tree half_type0, half_type1;
373 gimple *def_stmt;
374 tree oprnd0, oprnd1;
376 oprnd0 = gimple_assign_rhs1 (stmt);
377 oprnd1 = gimple_assign_rhs2 (stmt);
378 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
379 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
380 return NULL;
381 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
382 &promotion)
383 || !promotion)
384 return NULL;
385 oprnd00 = gimple_assign_rhs1 (def_stmt);
386 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
387 &promotion)
388 || !promotion)
389 return NULL;
390 oprnd01 = gimple_assign_rhs1 (def_stmt);
391 if (!types_compatible_p (half_type0, half_type1))
392 return NULL;
393 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
394 return NULL;
397 half_type = TREE_TYPE (oprnd00);
398 *type_in = half_type;
399 *type_out = type;
401 /* Pattern detected. Create a stmt to be used to replace the pattern: */
402 var = vect_recog_temp_ssa_var (type, NULL);
403 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
404 oprnd00, oprnd01, oprnd1);
406 if (dump_enabled_p ())
408 dump_printf_loc (MSG_NOTE, vect_location,
409 "vect_recog_dot_prod_pattern: detected: ");
410 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
411 dump_printf (MSG_NOTE, "\n");
414 return pattern_stmt;
418 /* Function vect_recog_sad_pattern
420 Try to find the following Sum of Absolute Difference (SAD) pattern:
422 type x_t, y_t;
423 signed TYPE1 diff, abs_diff;
424 TYPE2 sum = init;
425 loop:
426 sum_0 = phi <init, sum_1>
427 S1 x_t = ...
428 S2 y_t = ...
429 S3 x_T = (TYPE1) x_t;
430 S4 y_T = (TYPE1) y_t;
431 S5 diff = x_T - y_T;
432 S6 abs_diff = ABS_EXPR <diff>;
433 [S7 abs_diff = (TYPE2) abs_diff; #optional]
434 S8 sum_1 = abs_diff + sum_0;
436 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
437 same size of 'TYPE1' or bigger. This is a special case of a reduction
438 computation.
440 Input:
442 * STMTS: Contains a stmt from which the pattern search begins. In the
443 example, when this function is called with S8, the pattern
444 {S3,S4,S5,S6,S7,S8} will be detected.
446 Output:
448 * TYPE_IN: The type of the input arguments to the pattern.
450 * TYPE_OUT: The type of the output of this pattern.
452 * Return value: A new stmt that will be used to replace the sequence of
453 stmts that constitute the pattern. In this case it will be:
454 SAD_EXPR <x_t, y_t, sum_0>
457 static gimple *
458 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
459 tree *type_out)
461 gimple *last_stmt = (*stmts)[0];
462 tree sad_oprnd0, sad_oprnd1;
463 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
464 tree half_type;
465 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
466 struct loop *loop;
467 bool promotion;
469 if (!loop_info)
470 return NULL;
472 loop = LOOP_VINFO_LOOP (loop_info);
474 /* We don't allow changing the order of the computation in the inner-loop
475 when doing outer-loop vectorization. */
476 if (loop && nested_in_vect_loop_p (loop, last_stmt))
477 return NULL;
479 if (!is_gimple_assign (last_stmt))
480 return NULL;
482 tree sum_type = gimple_expr_type (last_stmt);
484 /* Look for the following pattern
485 DX = (TYPE1) X;
486 DY = (TYPE1) Y;
487 DDIFF = DX - DY;
488 DAD = ABS_EXPR <DDIFF>;
489 DDPROD = (TYPE2) DPROD;
490 sum_1 = DAD + sum_0;
491 In which
492 - DX is at least double the size of X
493 - DY is at least double the size of Y
494 - DX, DY, DDIFF, DAD all have the same type
495 - sum is the same size of DAD or bigger
496 - sum has been recognized as a reduction variable.
498 This is equivalent to:
499 DDIFF = X w- Y; #widen sub
500 DAD = ABS_EXPR <DDIFF>;
501 sum_1 = DAD w+ sum_0; #widen summation
503 DDIFF = X w- Y; #widen sub
504 DAD = ABS_EXPR <DDIFF>;
505 sum_1 = DAD + sum_0; #summation
508 /* Starting from LAST_STMT, follow the defs of its uses in search
509 of the above pattern. */
511 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
512 return NULL;
514 tree plus_oprnd0, plus_oprnd1;
516 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
518 /* Has been detected as widening-summation? */
520 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
521 sum_type = gimple_expr_type (stmt);
522 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
523 return NULL;
524 plus_oprnd0 = gimple_assign_rhs1 (stmt);
525 plus_oprnd1 = gimple_assign_rhs2 (stmt);
526 half_type = TREE_TYPE (plus_oprnd0);
528 else
530 gimple *def_stmt;
532 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
533 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
534 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
535 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
536 return NULL;
538 /* The type conversion could be promotion, demotion,
539 or just signed -> unsigned. */
540 if (type_conversion_p (plus_oprnd0, last_stmt, false,
541 &half_type, &def_stmt, &promotion))
542 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
543 else
544 half_type = sum_type;
547 /* So far so good. Since last_stmt was detected as a (summation) reduction,
548 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
549 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
550 Then check that plus_oprnd0 is defined by an abs_expr. */
552 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
553 return NULL;
555 tree abs_type = half_type;
556 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
558 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
559 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
560 return NULL;
562 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
563 inside the loop (in case we are analyzing an outer-loop). */
564 if (!is_gimple_assign (abs_stmt))
565 return NULL;
567 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
568 gcc_assert (abs_stmt_vinfo);
569 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
570 return NULL;
571 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
572 return NULL;
574 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
575 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
576 return NULL;
577 if (TYPE_UNSIGNED (abs_type))
578 return NULL;
580 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
582 if (TREE_CODE (abs_oprnd) != SSA_NAME)
583 return NULL;
585 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
587 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
588 if (!gimple_bb (diff_stmt)
589 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
590 return NULL;
592 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
593 inside the loop (in case we are analyzing an outer-loop). */
594 if (!is_gimple_assign (diff_stmt))
595 return NULL;
597 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
598 gcc_assert (diff_stmt_vinfo);
599 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
600 return NULL;
601 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
602 return NULL;
604 tree half_type0, half_type1;
605 gimple *def_stmt;
607 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
608 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
610 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
611 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
612 return NULL;
613 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
614 &half_type0, &def_stmt, &promotion)
615 || !promotion)
616 return NULL;
617 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
619 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
620 &half_type1, &def_stmt, &promotion)
621 || !promotion)
622 return NULL;
623 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
625 if (!types_compatible_p (half_type0, half_type1))
626 return NULL;
627 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
628 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
629 return NULL;
631 *type_in = TREE_TYPE (sad_oprnd0);
632 *type_out = sum_type;
634 /* Pattern detected. Create a stmt to be used to replace the pattern: */
635 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
636 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
637 sad_oprnd1, plus_oprnd1);
639 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location,
642 "vect_recog_sad_pattern: detected: ");
643 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
644 dump_printf (MSG_NOTE, "\n");
647 return pattern_stmt;
651 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
652 and LSHIFT_EXPR.
654 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
655 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
657 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
658 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
659 that satisfies the above restrictions, we can perform a widening opeartion
660 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
661 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
663 static bool
664 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
665 tree const_oprnd, tree *oprnd,
666 gimple **wstmt, tree type,
667 tree *half_type, gimple *def_stmt)
669 tree new_type, new_oprnd;
671 if (code != MULT_EXPR && code != LSHIFT_EXPR)
672 return false;
674 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
675 || (code == LSHIFT_EXPR
676 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
677 != 1))
678 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
680 /* CONST_OPRND is a constant of HALF_TYPE. */
681 *oprnd = gimple_assign_rhs1 (def_stmt);
682 return true;
685 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
686 return false;
688 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
689 return false;
691 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
692 a type 2 times bigger than HALF_TYPE. */
693 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
694 TYPE_UNSIGNED (type));
695 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
696 || (code == LSHIFT_EXPR
697 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
698 return false;
700 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
701 *oprnd = gimple_assign_rhs1 (def_stmt);
702 new_oprnd = make_ssa_name (new_type);
703 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
704 *oprnd = new_oprnd;
706 *half_type = new_type;
707 return true;
711 /* Function vect_recog_widen_mult_pattern
713 Try to find the following pattern:
715 type1 a_t;
716 type2 b_t;
717 TYPE a_T, b_T, prod_T;
719 S1 a_t = ;
720 S2 b_t = ;
721 S3 a_T = (TYPE) a_t;
722 S4 b_T = (TYPE) b_t;
723 S5 prod_T = a_T * b_T;
725 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
727 Also detect unsigned cases:
729 unsigned type1 a_t;
730 unsigned type2 b_t;
731 unsigned TYPE u_prod_T;
732 TYPE a_T, b_T, prod_T;
734 S1 a_t = ;
735 S2 b_t = ;
736 S3 a_T = (TYPE) a_t;
737 S4 b_T = (TYPE) b_t;
738 S5 prod_T = a_T * b_T;
739 S6 u_prod_T = (unsigned TYPE) prod_T;
741 and multiplication by constants:
743 type a_t;
744 TYPE a_T, prod_T;
746 S1 a_t = ;
747 S3 a_T = (TYPE) a_t;
748 S5 prod_T = a_T * CONST;
750 A special case of multiplication by constants is when 'TYPE' is 4 times
751 bigger than 'type', but CONST fits an intermediate type 2 times smaller
752 than 'TYPE'. In that case we create an additional pattern stmt for S3
753 to create a variable of the intermediate type, and perform widen-mult
754 on the intermediate type as well:
756 type a_t;
757 interm_type a_it;
758 TYPE a_T, prod_T, prod_T';
760 S1 a_t = ;
761 S3 a_T = (TYPE) a_t;
762 '--> a_it = (interm_type) a_t;
763 S5 prod_T = a_T * CONST;
764 '--> prod_T' = a_it w* CONST;
766 Input/Output:
768 * STMTS: Contains a stmt from which the pattern search begins. In the
769 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
770 is detected. In case of unsigned widen-mult, the original stmt (S5) is
771 replaced with S6 in STMTS. In case of multiplication by a constant
772 of an intermediate type (the last case above), STMTS also contains S3
773 (inserted before S5).
775 Output:
777 * TYPE_IN: The type of the input arguments to the pattern.
779 * TYPE_OUT: The type of the output of this pattern.
781 * Return value: A new stmt that will be used to replace the sequence of
782 stmts that constitute the pattern. In this case it will be:
783 WIDEN_MULT <a_t, b_t>
784 If the result of WIDEN_MULT needs to be converted to a larger type, the
785 returned stmt will be this type conversion stmt.
788 static gimple *
789 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
790 tree *type_in, tree *type_out)
792 gimple *last_stmt = stmts->pop ();
793 gimple *def_stmt0, *def_stmt1;
794 tree oprnd0, oprnd1;
795 tree type, half_type0, half_type1;
796 gimple *new_stmt = NULL, *pattern_stmt = NULL;
797 tree vectype, vecitype;
798 tree var;
799 enum tree_code dummy_code;
800 int dummy_int;
801 vec<tree> dummy_vec;
802 bool op1_ok;
803 bool promotion;
805 if (!is_gimple_assign (last_stmt))
806 return NULL;
808 type = gimple_expr_type (last_stmt);
810 /* Starting from LAST_STMT, follow the defs of its uses in search
811 of the above pattern. */
813 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
814 return NULL;
816 oprnd0 = gimple_assign_rhs1 (last_stmt);
817 oprnd1 = gimple_assign_rhs2 (last_stmt);
818 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
819 || !types_compatible_p (TREE_TYPE (oprnd1), type))
820 return NULL;
822 /* Check argument 0. */
823 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
824 &promotion)
825 || !promotion)
826 return NULL;
827 /* Check argument 1. */
828 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
829 &def_stmt1, &promotion);
831 if (op1_ok && promotion)
833 oprnd0 = gimple_assign_rhs1 (def_stmt0);
834 oprnd1 = gimple_assign_rhs1 (def_stmt1);
836 else
838 if (TREE_CODE (oprnd1) == INTEGER_CST
839 && TREE_CODE (half_type0) == INTEGER_TYPE
840 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
841 &oprnd0, &new_stmt, type,
842 &half_type0, def_stmt0))
844 half_type1 = half_type0;
845 oprnd1 = fold_convert (half_type1, oprnd1);
847 else
848 return NULL;
851 /* If the two arguments have different sizes, convert the one with
852 the smaller type into the larger type. */
853 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
855 /* If we already used up the single-stmt slot give up. */
856 if (new_stmt)
857 return NULL;
859 tree* oprnd = NULL;
860 gimple *def_stmt = NULL;
862 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
864 def_stmt = def_stmt0;
865 half_type0 = half_type1;
866 oprnd = &oprnd0;
868 else
870 def_stmt = def_stmt1;
871 half_type1 = half_type0;
872 oprnd = &oprnd1;
875 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
876 tree new_oprnd = make_ssa_name (half_type0);
877 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
878 *oprnd = new_oprnd;
881 /* Handle unsigned case. Look for
882 S6 u_prod_T = (unsigned TYPE) prod_T;
883 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
884 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
886 gimple *use_stmt;
887 tree use_lhs;
888 tree use_type;
890 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
891 return NULL;
893 use_stmt = vect_single_imm_use (last_stmt);
894 if (!use_stmt || !is_gimple_assign (use_stmt)
895 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
896 return NULL;
898 use_lhs = gimple_assign_lhs (use_stmt);
899 use_type = TREE_TYPE (use_lhs);
900 if (!INTEGRAL_TYPE_P (use_type)
901 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
902 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
903 return NULL;
905 type = use_type;
906 last_stmt = use_stmt;
909 if (!types_compatible_p (half_type0, half_type1))
910 return NULL;
912 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
913 to get an intermediate result of type ITYPE. In this case we need
914 to build a statement to convert this intermediate result to type TYPE. */
915 tree itype = type;
916 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
917 itype = build_nonstandard_integer_type
918 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
919 TYPE_UNSIGNED (type));
921 /* Pattern detected. */
922 if (dump_enabled_p ())
923 dump_printf_loc (MSG_NOTE, vect_location,
924 "vect_recog_widen_mult_pattern: detected:\n");
926 /* Check target support */
927 vectype = get_vectype_for_scalar_type (half_type0);
928 vecitype = get_vectype_for_scalar_type (itype);
929 if (!vectype
930 || !vecitype
931 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
932 vecitype, vectype,
933 &dummy_code, &dummy_code,
934 &dummy_int, &dummy_vec))
935 return NULL;
937 *type_in = vectype;
938 *type_out = get_vectype_for_scalar_type (type);
940 /* Pattern supported. Create a stmt to be used to replace the pattern: */
941 var = vect_recog_temp_ssa_var (itype, NULL);
942 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
944 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
945 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
947 /* If the original two operands have different sizes, we may need to convert
948 the smaller one into the larget type. If this is the case, at this point
949 the new stmt is already built. */
950 if (new_stmt)
952 append_pattern_def_seq (stmt_vinfo, new_stmt);
953 stmt_vec_info new_stmt_info
954 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
955 set_vinfo_for_stmt (new_stmt, new_stmt_info);
956 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
959 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
960 the result of the widen-mult operation into type TYPE. */
961 if (itype != type)
963 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
964 stmt_vec_info pattern_stmt_info
965 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
966 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
967 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
968 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
969 NOP_EXPR,
970 gimple_assign_lhs (pattern_stmt));
973 if (dump_enabled_p ())
974 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
976 stmts->safe_push (last_stmt);
977 return pattern_stmt;
981 /* Function vect_recog_pow_pattern
983 Try to find the following pattern:
985 x = POW (y, N);
987 with POW being one of pow, powf, powi, powif and N being
988 either 2 or 0.5.
990 Input:
992 * LAST_STMT: A stmt from which the pattern search begins.
994 Output:
996 * TYPE_IN: The type of the input arguments to the pattern.
998 * TYPE_OUT: The type of the output of this pattern.
1000 * Return value: A new stmt that will be used to replace the sequence of
1001 stmts that constitute the pattern. In this case it will be:
1002 x = x * x
1004 x = sqrt (x)
1007 static gimple *
1008 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1009 tree *type_out)
1011 gimple *last_stmt = (*stmts)[0];
1012 tree fn, base, exp = NULL;
1013 gimple *stmt;
1014 tree var;
1016 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1017 return NULL;
1019 fn = gimple_call_fndecl (last_stmt);
1020 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1021 return NULL;
1023 switch (DECL_FUNCTION_CODE (fn))
1025 case BUILT_IN_POWIF:
1026 case BUILT_IN_POWI:
1027 case BUILT_IN_POWF:
1028 case BUILT_IN_POW:
1029 base = gimple_call_arg (last_stmt, 0);
1030 exp = gimple_call_arg (last_stmt, 1);
1031 if (TREE_CODE (exp) != REAL_CST
1032 && TREE_CODE (exp) != INTEGER_CST)
1033 return NULL;
1034 break;
1036 default:
1037 return NULL;
1040 /* We now have a pow or powi builtin function call with a constant
1041 exponent. */
1043 *type_out = NULL_TREE;
1045 /* Catch squaring. */
1046 if ((tree_fits_shwi_p (exp)
1047 && tree_to_shwi (exp) == 2)
1048 || (TREE_CODE (exp) == REAL_CST
1049 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1051 *type_in = TREE_TYPE (base);
1053 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1054 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1055 return stmt;
1058 /* Catch square root. */
1059 if (TREE_CODE (exp) == REAL_CST
1060 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1062 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
1063 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1064 if (*type_in)
1066 gcall *stmt = gimple_build_call (newfn, 1, base);
1067 if (vectorizable_function (stmt, *type_in, *type_in)
1068 != NULL_TREE)
1070 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1071 gimple_call_set_lhs (stmt, var);
1072 return stmt;
1077 return NULL;
1081 /* Function vect_recog_widen_sum_pattern
1083 Try to find the following pattern:
1085 type x_t;
1086 TYPE x_T, sum = init;
1087 loop:
1088 sum_0 = phi <init, sum_1>
1089 S1 x_t = *p;
1090 S2 x_T = (TYPE) x_t;
1091 S3 sum_1 = x_T + sum_0;
1093 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1094 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1095 a special case of a reduction computation.
1097 Input:
1099 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1100 when this function is called with S3, the pattern {S2,S3} will be detected.
1102 Output:
1104 * TYPE_IN: The type of the input arguments to the pattern.
1106 * TYPE_OUT: The type of the output of this pattern.
1108 * Return value: A new stmt that will be used to replace the sequence of
1109 stmts that constitute the pattern. In this case it will be:
1110 WIDEN_SUM <x_t, sum_0>
1112 Note: The widening-sum idiom is a widening reduction pattern that is
1113 vectorized without preserving all the intermediate results. It
1114 produces only N/2 (widened) results (by summing up pairs of
1115 intermediate results) rather than all N results. Therefore, we
1116 cannot allow this pattern when we want to get all the results and in
1117 the correct order (as is the case when this computation is in an
1118 inner-loop nested in an outer-loop that us being vectorized). */
1120 static gimple *
1121 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1122 tree *type_out)
1124 gimple *stmt, *last_stmt = (*stmts)[0];
1125 tree oprnd0, oprnd1;
1126 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1127 tree type, half_type;
1128 gimple *pattern_stmt;
1129 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1130 struct loop *loop;
1131 tree var;
1132 bool promotion;
1134 if (!loop_info)
1135 return NULL;
1137 loop = LOOP_VINFO_LOOP (loop_info);
1139 /* We don't allow changing the order of the computation in the inner-loop
1140 when doing outer-loop vectorization. */
1141 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1142 return NULL;
1144 if (!is_gimple_assign (last_stmt))
1145 return NULL;
1147 type = gimple_expr_type (last_stmt);
1149 /* Look for the following pattern
1150 DX = (TYPE) X;
1151 sum_1 = DX + sum_0;
1152 In which DX is at least double the size of X, and sum_1 has been
1153 recognized as a reduction variable.
1156 /* Starting from LAST_STMT, follow the defs of its uses in search
1157 of the above pattern. */
1159 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1160 return NULL;
1162 oprnd0 = gimple_assign_rhs1 (last_stmt);
1163 oprnd1 = gimple_assign_rhs2 (last_stmt);
1164 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1165 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1166 return NULL;
1168 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1169 we know that oprnd1 is the reduction variable (defined by a loop-header
1170 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1171 Left to check that oprnd0 is defined by a cast from type 'type' to type
1172 'TYPE'. */
1174 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1175 &promotion)
1176 || !promotion)
1177 return NULL;
1179 oprnd0 = gimple_assign_rhs1 (stmt);
1180 *type_in = half_type;
1181 *type_out = type;
1183 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1184 var = vect_recog_temp_ssa_var (type, NULL);
1185 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1187 if (dump_enabled_p ())
1189 dump_printf_loc (MSG_NOTE, vect_location,
1190 "vect_recog_widen_sum_pattern: detected: ");
1191 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1192 dump_printf (MSG_NOTE, "\n");
1195 return pattern_stmt;
1199 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1201 Input:
1202 STMT - a statement to check.
1203 DEF - we support operations with two operands, one of which is constant.
1204 The other operand can be defined by a demotion operation, or by a
1205 previous statement in a sequence of over-promoted operations. In the
1206 later case DEF is used to replace that operand. (It is defined by a
1207 pattern statement we created for the previous statement in the
1208 sequence).
1210 Input/output:
1211 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1212 NULL, it's the type of DEF.
1213 STMTS - additional pattern statements. If a pattern statement (type
1214 conversion) is created in this function, its original statement is
1215 added to STMTS.
1217 Output:
1218 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1219 operands to use in the new pattern statement for STMT (will be created
1220 in vect_recog_over_widening_pattern ()).
1221 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1222 statements for STMT: the first one is a type promotion and the second
1223 one is the operation itself. We return the type promotion statement
1224 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1225 the second pattern statement. */
1227 static bool
1228 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1229 tree *op0, tree *op1, gimple **new_def_stmt,
1230 vec<gimple *> *stmts)
1232 enum tree_code code;
1233 tree const_oprnd, oprnd;
1234 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1235 gimple *def_stmt, *new_stmt;
1236 bool first = false;
1237 bool promotion;
1239 *op0 = NULL_TREE;
1240 *op1 = NULL_TREE;
1241 *new_def_stmt = NULL;
1243 if (!is_gimple_assign (stmt))
1244 return false;
1246 code = gimple_assign_rhs_code (stmt);
1247 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1248 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1249 return false;
1251 oprnd = gimple_assign_rhs1 (stmt);
1252 const_oprnd = gimple_assign_rhs2 (stmt);
1253 type = gimple_expr_type (stmt);
1255 if (TREE_CODE (oprnd) != SSA_NAME
1256 || TREE_CODE (const_oprnd) != INTEGER_CST)
1257 return false;
1259 /* If oprnd has other uses besides that in stmt we cannot mark it
1260 as being part of a pattern only. */
1261 if (!has_single_use (oprnd))
1262 return false;
1264 /* If we are in the middle of a sequence, we use DEF from a previous
1265 statement. Otherwise, OPRND has to be a result of type promotion. */
1266 if (*new_type)
1268 half_type = *new_type;
1269 oprnd = def;
1271 else
1273 first = true;
1274 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1275 &promotion)
1276 || !promotion
1277 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1278 return false;
1281 /* Can we perform the operation on a smaller type? */
1282 switch (code)
1284 case BIT_IOR_EXPR:
1285 case BIT_XOR_EXPR:
1286 case BIT_AND_EXPR:
1287 if (!int_fits_type_p (const_oprnd, half_type))
1289 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1290 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1291 return false;
1293 interm_type = build_nonstandard_integer_type (
1294 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1295 if (!int_fits_type_p (const_oprnd, interm_type))
1296 return false;
1299 break;
1301 case LSHIFT_EXPR:
1302 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1303 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1304 return false;
1306 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1307 (e.g., if the original value was char, the shift amount is at most 8
1308 if we want to use short). */
1309 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1310 return false;
1312 interm_type = build_nonstandard_integer_type (
1313 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1315 if (!vect_supportable_shift (code, interm_type))
1316 return false;
1318 break;
1320 case RSHIFT_EXPR:
1321 if (vect_supportable_shift (code, half_type))
1322 break;
1324 /* Try intermediate type - HALF_TYPE is not supported. */
1325 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1326 return false;
1328 interm_type = build_nonstandard_integer_type (
1329 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1331 if (!vect_supportable_shift (code, interm_type))
1332 return false;
1334 break;
1336 default:
1337 gcc_unreachable ();
1340 /* There are four possible cases:
1341 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1342 the first statement in the sequence)
1343 a. The original, HALF_TYPE, is not enough - we replace the promotion
1344 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1345 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1346 promotion.
1347 2. OPRND is defined by a pattern statement we created.
1348 a. Its type is not sufficient for the operation, we create a new stmt:
1349 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1350 this statement in NEW_DEF_STMT, and it is later put in
1351 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1352 b. OPRND is good to use in the new statement. */
1353 if (first)
1355 if (interm_type)
1357 /* Replace the original type conversion HALF_TYPE->TYPE with
1358 HALF_TYPE->INTERM_TYPE. */
1359 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1361 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1362 /* Check if the already created pattern stmt is what we need. */
1363 if (!is_gimple_assign (new_stmt)
1364 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1365 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1366 return false;
1368 stmts->safe_push (def_stmt);
1369 oprnd = gimple_assign_lhs (new_stmt);
1371 else
1373 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1374 oprnd = gimple_assign_rhs1 (def_stmt);
1375 new_oprnd = make_ssa_name (interm_type);
1376 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1377 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1378 stmts->safe_push (def_stmt);
1379 oprnd = new_oprnd;
1382 else
1384 /* Retrieve the operand before the type promotion. */
1385 oprnd = gimple_assign_rhs1 (def_stmt);
1388 else
1390 if (interm_type)
1392 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1393 new_oprnd = make_ssa_name (interm_type);
1394 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1395 oprnd = new_oprnd;
1396 *new_def_stmt = new_stmt;
1399 /* Otherwise, OPRND is already set. */
1402 if (interm_type)
1403 *new_type = interm_type;
1404 else
1405 *new_type = half_type;
1407 *op0 = oprnd;
1408 *op1 = fold_convert (*new_type, const_oprnd);
1410 return true;
1414 /* Try to find a statement or a sequence of statements that can be performed
1415 on a smaller type:
1417 type x_t;
1418 TYPE x_T, res0_T, res1_T;
1419 loop:
1420 S1 x_t = *p;
1421 S2 x_T = (TYPE) x_t;
1422 S3 res0_T = op (x_T, C0);
1423 S4 res1_T = op (res0_T, C1);
1424 S5 ... = () res1_T; - type demotion
1426 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1427 constants.
1428 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1429 be 'type' or some intermediate type. For now, we expect S5 to be a type
1430 demotion operation. We also check that S3 and S4 have only one use. */
1432 static gimple *
1433 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1434 tree *type_in, tree *type_out)
1436 gimple *stmt = stmts->pop ();
1437 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1438 *use_stmt = NULL;
1439 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1440 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1441 bool first;
1442 tree type = NULL;
1444 first = true;
1445 while (1)
1447 if (!vinfo_for_stmt (stmt)
1448 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1449 return NULL;
1451 new_def_stmt = NULL;
1452 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1453 &op0, &op1, &new_def_stmt,
1454 stmts))
1456 if (first)
1457 return NULL;
1458 else
1459 break;
1462 /* STMT can be performed on a smaller type. Check its uses. */
1463 use_stmt = vect_single_imm_use (stmt);
1464 if (!use_stmt || !is_gimple_assign (use_stmt))
1465 return NULL;
1467 /* Create pattern statement for STMT. */
1468 vectype = get_vectype_for_scalar_type (new_type);
1469 if (!vectype)
1470 return NULL;
1472 /* We want to collect all the statements for which we create pattern
1473 statetments, except for the case when the last statement in the
1474 sequence doesn't have a corresponding pattern statement. In such
1475 case we associate the last pattern statement with the last statement
1476 in the sequence. Therefore, we only add the original statement to
1477 the list if we know that it is not the last. */
1478 if (prev_stmt)
1479 stmts->safe_push (prev_stmt);
1481 var = vect_recog_temp_ssa_var (new_type, NULL);
1482 pattern_stmt
1483 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1484 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1485 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1487 if (dump_enabled_p ())
1489 dump_printf_loc (MSG_NOTE, vect_location,
1490 "created pattern stmt: ");
1491 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1492 dump_printf (MSG_NOTE, "\n");
1495 type = gimple_expr_type (stmt);
1496 prev_stmt = stmt;
1497 stmt = use_stmt;
1499 first = false;
1502 /* We got a sequence. We expect it to end with a type demotion operation.
1503 Otherwise, we quit (for now). There are three possible cases: the
1504 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1505 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1506 NEW_TYPE differs (we create a new conversion statement). */
1507 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1509 use_lhs = gimple_assign_lhs (use_stmt);
1510 use_type = TREE_TYPE (use_lhs);
1511 /* Support only type demotion or signedess change. */
1512 if (!INTEGRAL_TYPE_P (use_type)
1513 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1514 return NULL;
1516 /* Check that NEW_TYPE is not bigger than the conversion result. */
1517 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1518 return NULL;
1520 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1521 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1523 /* Create NEW_TYPE->USE_TYPE conversion. */
1524 new_oprnd = make_ssa_name (use_type);
1525 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1526 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1528 *type_in = get_vectype_for_scalar_type (new_type);
1529 *type_out = get_vectype_for_scalar_type (use_type);
1531 /* We created a pattern statement for the last statement in the
1532 sequence, so we don't need to associate it with the pattern
1533 statement created for PREV_STMT. Therefore, we add PREV_STMT
1534 to the list in order to mark it later in vect_pattern_recog_1. */
1535 if (prev_stmt)
1536 stmts->safe_push (prev_stmt);
1538 else
1540 if (prev_stmt)
1541 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1542 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1544 *type_in = vectype;
1545 *type_out = NULL_TREE;
1548 stmts->safe_push (use_stmt);
1550 else
1551 /* TODO: support general case, create a conversion to the correct type. */
1552 return NULL;
1554 /* Pattern detected. */
1555 if (dump_enabled_p ())
1557 dump_printf_loc (MSG_NOTE, vect_location,
1558 "vect_recog_over_widening_pattern: detected: ");
1559 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1560 dump_printf (MSG_NOTE, "\n");
1563 return pattern_stmt;
1566 /* Detect widening shift pattern:
1568 type a_t;
1569 TYPE a_T, res_T;
1571 S1 a_t = ;
1572 S2 a_T = (TYPE) a_t;
1573 S3 res_T = a_T << CONST;
1575 where type 'TYPE' is at least double the size of type 'type'.
1577 Also detect cases where the shift result is immediately converted
1578 to another type 'result_type' that is no larger in size than 'TYPE'.
1579 In those cases we perform a widen-shift that directly results in
1580 'result_type', to avoid a possible over-widening situation:
1582 type a_t;
1583 TYPE a_T, res_T;
1584 result_type res_result;
1586 S1 a_t = ;
1587 S2 a_T = (TYPE) a_t;
1588 S3 res_T = a_T << CONST;
1589 S4 res_result = (result_type) res_T;
1590 '--> res_result' = a_t w<< CONST;
1592 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1593 create an additional pattern stmt for S2 to create a variable of an
1594 intermediate type, and perform widen-shift on the intermediate type:
1596 type a_t;
1597 interm_type a_it;
1598 TYPE a_T, res_T, res_T';
1600 S1 a_t = ;
1601 S2 a_T = (TYPE) a_t;
1602 '--> a_it = (interm_type) a_t;
1603 S3 res_T = a_T << CONST;
1604 '--> res_T' = a_it <<* CONST;
1606 Input/Output:
1608 * STMTS: Contains a stmt from which the pattern search begins.
1609 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1610 in STMTS. When an intermediate type is used and a pattern statement is
1611 created for S2, we also put S2 here (before S3).
1613 Output:
1615 * TYPE_IN: The type of the input arguments to the pattern.
1617 * TYPE_OUT: The type of the output of this pattern.
1619 * Return value: A new stmt that will be used to replace the sequence of
1620 stmts that constitute the pattern. In this case it will be:
1621 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1623 static gimple *
1624 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1625 tree *type_in, tree *type_out)
1627 gimple *last_stmt = stmts->pop ();
1628 gimple *def_stmt0;
1629 tree oprnd0, oprnd1;
1630 tree type, half_type0;
1631 gimple *pattern_stmt;
1632 tree vectype, vectype_out = NULL_TREE;
1633 tree var;
1634 enum tree_code dummy_code;
1635 int dummy_int;
1636 vec<tree> dummy_vec;
1637 gimple *use_stmt;
1638 bool promotion;
1640 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1641 return NULL;
1643 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1644 return NULL;
1646 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1647 return NULL;
1649 oprnd0 = gimple_assign_rhs1 (last_stmt);
1650 oprnd1 = gimple_assign_rhs2 (last_stmt);
1651 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1652 return NULL;
1654 /* Check operand 0: it has to be defined by a type promotion. */
1655 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1656 &promotion)
1657 || !promotion)
1658 return NULL;
1660 /* Check operand 1: has to be positive. We check that it fits the type
1661 in vect_handle_widen_op_by_const (). */
1662 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1663 return NULL;
1665 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1666 type = gimple_expr_type (last_stmt);
1668 /* Check for subsequent conversion to another type. */
1669 use_stmt = vect_single_imm_use (last_stmt);
1670 if (use_stmt && is_gimple_assign (use_stmt)
1671 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1672 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1674 tree use_lhs = gimple_assign_lhs (use_stmt);
1675 tree use_type = TREE_TYPE (use_lhs);
1677 if (INTEGRAL_TYPE_P (use_type)
1678 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1680 last_stmt = use_stmt;
1681 type = use_type;
1685 /* Check if this a widening operation. */
1686 gimple *wstmt = NULL;
1687 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1688 &oprnd0, &wstmt,
1689 type, &half_type0, def_stmt0))
1690 return NULL;
1692 /* Pattern detected. */
1693 if (dump_enabled_p ())
1694 dump_printf_loc (MSG_NOTE, vect_location,
1695 "vect_recog_widen_shift_pattern: detected:\n");
1697 /* Check target support. */
1698 vectype = get_vectype_for_scalar_type (half_type0);
1699 vectype_out = get_vectype_for_scalar_type (type);
1701 if (!vectype
1702 || !vectype_out
1703 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1704 vectype_out, vectype,
1705 &dummy_code, &dummy_code,
1706 &dummy_int, &dummy_vec))
1707 return NULL;
1709 *type_in = vectype;
1710 *type_out = vectype_out;
1712 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1713 var = vect_recog_temp_ssa_var (type, NULL);
1714 pattern_stmt =
1715 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1716 if (wstmt)
1718 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1719 new_pattern_def_seq (stmt_vinfo, wstmt);
1720 stmt_vec_info new_stmt_info
1721 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1722 set_vinfo_for_stmt (wstmt, new_stmt_info);
1723 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1726 if (dump_enabled_p ())
1727 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1729 stmts->safe_push (last_stmt);
1730 return pattern_stmt;
1733 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1735 type a_t, b_t, c_t;
1737 S0 a_t = b_t r<< c_t;
1739 Input/Output:
1741 * STMTS: Contains a stmt from which the pattern search begins,
1742 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1743 with a sequence:
1745 S1 d_t = -c_t;
1746 S2 e_t = d_t & (B - 1);
1747 S3 f_t = b_t << c_t;
1748 S4 g_t = b_t >> e_t;
1749 S0 a_t = f_t | g_t;
1751 where B is element bitsize of type.
1753 Output:
1755 * TYPE_IN: The type of the input arguments to the pattern.
1757 * TYPE_OUT: The type of the output of this pattern.
1759 * Return value: A new stmt that will be used to replace the rotate
1760 S0 stmt. */
1762 static gimple *
1763 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1765 gimple *last_stmt = stmts->pop ();
1766 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1767 gimple *pattern_stmt, *def_stmt;
1768 enum tree_code rhs_code;
1769 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1770 vec_info *vinfo = stmt_vinfo->vinfo;
1771 enum vect_def_type dt;
1772 optab optab1, optab2;
1773 edge ext_def = NULL;
1775 if (!is_gimple_assign (last_stmt))
1776 return NULL;
1778 rhs_code = gimple_assign_rhs_code (last_stmt);
1779 switch (rhs_code)
1781 case LROTATE_EXPR:
1782 case RROTATE_EXPR:
1783 break;
1784 default:
1785 return NULL;
1788 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1789 return NULL;
1791 lhs = gimple_assign_lhs (last_stmt);
1792 oprnd0 = gimple_assign_rhs1 (last_stmt);
1793 type = TREE_TYPE (oprnd0);
1794 oprnd1 = gimple_assign_rhs2 (last_stmt);
1795 if (TREE_CODE (oprnd0) != SSA_NAME
1796 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1797 || !INTEGRAL_TYPE_P (type)
1798 || !TYPE_UNSIGNED (type))
1799 return NULL;
1801 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1802 return NULL;
1804 if (dt != vect_internal_def
1805 && dt != vect_constant_def
1806 && dt != vect_external_def)
1807 return NULL;
1809 vectype = get_vectype_for_scalar_type (type);
1810 if (vectype == NULL_TREE)
1811 return NULL;
1813 /* If vector/vector or vector/scalar rotate is supported by the target,
1814 don't do anything here. */
1815 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1816 if (optab1
1817 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1818 return NULL;
1820 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1822 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1823 if (optab2
1824 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1825 return NULL;
1828 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1829 don't do anything here either. */
1830 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1831 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1832 if (!optab1
1833 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1834 || !optab2
1835 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1837 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1838 return NULL;
1839 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1840 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1841 if (!optab1
1842 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1843 || !optab2
1844 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1845 return NULL;
1848 *type_in = vectype;
1849 *type_out = vectype;
1850 if (*type_in == NULL_TREE)
1851 return NULL;
1853 if (dt == vect_external_def
1854 && TREE_CODE (oprnd1) == SSA_NAME
1855 && is_a <loop_vec_info> (vinfo))
1857 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1858 ext_def = loop_preheader_edge (loop);
1859 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1861 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1862 if (bb == NULL
1863 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1864 ext_def = NULL;
1868 def = NULL_TREE;
1869 if (TREE_CODE (oprnd1) == INTEGER_CST
1870 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1871 def = oprnd1;
1872 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1874 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1875 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1876 && TYPE_PRECISION (TREE_TYPE (rhs1))
1877 == TYPE_PRECISION (type))
1878 def = rhs1;
1881 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1882 if (def == NULL_TREE)
1884 def = vect_recog_temp_ssa_var (type, NULL);
1885 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1886 if (ext_def)
1888 basic_block new_bb
1889 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1890 gcc_assert (!new_bb);
1892 else
1893 append_pattern_def_seq (stmt_vinfo, def_stmt);
1895 stype = TREE_TYPE (def);
1897 if (TREE_CODE (def) == INTEGER_CST)
1899 if (!tree_fits_uhwi_p (def)
1900 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1901 || integer_zerop (def))
1902 return NULL;
1903 def2 = build_int_cst (stype,
1904 GET_MODE_PRECISION (TYPE_MODE (type))
1905 - tree_to_uhwi (def));
1907 else
1909 tree vecstype = get_vectype_for_scalar_type (stype);
1910 stmt_vec_info def_stmt_vinfo;
1912 if (vecstype == NULL_TREE)
1913 return NULL;
1914 def2 = vect_recog_temp_ssa_var (stype, NULL);
1915 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1916 if (ext_def)
1918 basic_block new_bb
1919 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1920 gcc_assert (!new_bb);
1922 else
1924 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1925 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1926 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1927 append_pattern_def_seq (stmt_vinfo, def_stmt);
1930 def2 = vect_recog_temp_ssa_var (stype, NULL);
1931 tree mask
1932 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1933 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1934 gimple_assign_lhs (def_stmt), mask);
1935 if (ext_def)
1937 basic_block new_bb
1938 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1939 gcc_assert (!new_bb);
1941 else
1943 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1944 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1945 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1946 append_pattern_def_seq (stmt_vinfo, def_stmt);
1950 var1 = vect_recog_temp_ssa_var (type, NULL);
1951 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1952 ? LSHIFT_EXPR : RSHIFT_EXPR,
1953 oprnd0, def);
1954 append_pattern_def_seq (stmt_vinfo, def_stmt);
1956 var2 = vect_recog_temp_ssa_var (type, NULL);
1957 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1958 ? RSHIFT_EXPR : LSHIFT_EXPR,
1959 oprnd0, def2);
1960 append_pattern_def_seq (stmt_vinfo, def_stmt);
1962 /* Pattern detected. */
1963 if (dump_enabled_p ())
1964 dump_printf_loc (MSG_NOTE, vect_location,
1965 "vect_recog_rotate_pattern: detected:\n");
1967 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1968 var = vect_recog_temp_ssa_var (type, NULL);
1969 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1971 if (dump_enabled_p ())
1972 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1974 stmts->safe_push (last_stmt);
1975 return pattern_stmt;
1978 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1979 vectorized:
1981 type a_t;
1982 TYPE b_T, res_T;
1984 S1 a_t = ;
1985 S2 b_T = ;
1986 S3 res_T = b_T op a_t;
1988 where type 'TYPE' is a type with different size than 'type',
1989 and op is <<, >> or rotate.
1991 Also detect cases:
1993 type a_t;
1994 TYPE b_T, c_T, res_T;
1996 S0 c_T = ;
1997 S1 a_t = (type) c_T;
1998 S2 b_T = ;
1999 S3 res_T = b_T op a_t;
2001 Input/Output:
2003 * STMTS: Contains a stmt from which the pattern search begins,
2004 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2005 with a shift/rotate which has same type on both operands, in the
2006 second case just b_T op c_T, in the first case with added cast
2007 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2009 Output:
2011 * TYPE_IN: The type of the input arguments to the pattern.
2013 * TYPE_OUT: The type of the output of this pattern.
2015 * Return value: A new stmt that will be used to replace the shift/rotate
2016 S3 stmt. */
2018 static gimple *
2019 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2020 tree *type_in, tree *type_out)
2022 gimple *last_stmt = stmts->pop ();
2023 tree oprnd0, oprnd1, lhs, var;
2024 gimple *pattern_stmt, *def_stmt;
2025 enum tree_code rhs_code;
2026 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2027 vec_info *vinfo = stmt_vinfo->vinfo;
2028 enum vect_def_type dt;
2030 if (!is_gimple_assign (last_stmt))
2031 return NULL;
2033 rhs_code = gimple_assign_rhs_code (last_stmt);
2034 switch (rhs_code)
2036 case LSHIFT_EXPR:
2037 case RSHIFT_EXPR:
2038 case LROTATE_EXPR:
2039 case RROTATE_EXPR:
2040 break;
2041 default:
2042 return NULL;
2045 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2046 return NULL;
2048 lhs = gimple_assign_lhs (last_stmt);
2049 oprnd0 = gimple_assign_rhs1 (last_stmt);
2050 oprnd1 = gimple_assign_rhs2 (last_stmt);
2051 if (TREE_CODE (oprnd0) != SSA_NAME
2052 || TREE_CODE (oprnd1) != SSA_NAME
2053 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2054 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2055 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2056 || TYPE_PRECISION (TREE_TYPE (lhs))
2057 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2058 return NULL;
2060 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2061 return NULL;
2063 if (dt != vect_internal_def)
2064 return NULL;
2066 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2067 *type_out = *type_in;
2068 if (*type_in == NULL_TREE)
2069 return NULL;
2071 tree def = NULL_TREE;
2072 if (gimple_assign_cast_p (def_stmt))
2074 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2075 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2076 && TYPE_PRECISION (TREE_TYPE (rhs1))
2077 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2078 def = rhs1;
2081 if (def == NULL_TREE)
2083 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2084 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2085 new_pattern_def_seq (stmt_vinfo, def_stmt);
2088 /* Pattern detected. */
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_NOTE, vect_location,
2091 "vect_recog_vector_vector_shift_pattern: detected:\n");
2093 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2094 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2095 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2097 if (dump_enabled_p ())
2098 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2100 stmts->safe_push (last_stmt);
2101 return pattern_stmt;
2104 /* Detect multiplication by constant which are postive or negatives of power 2,
2105 and convert them to shift patterns.
2107 Mult with constants that are postive power of two.
2108 type a_t;
2109 type b_t
2110 S1: b_t = a_t * n
2114 Mult with constants that are negative power of two.
2115 S2: b_t = a_t * -n
2117 Input/Output:
2119 STMTS: Contains a stmt from which the pattern search begins,
2120 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2121 constant operand is a power of 2.
2122 type a_t, b_t
2123 S1': b_t = a_t << log2 (n)
2125 Convert the mult operation to LSHIFT and followed by a NEGATE
2126 if constant operand is a negative power of 2.
2127 type a_t, b_t, res_T;
2128 S2': b_t = a_t << log2 (n)
2129 S3': res_T = - (b_t)
2131 Output:
2133 * TYPE_IN: The type of the input arguments to the pattern.
2135 * TYPE_OUT: The type of the output of this pattern.
2137 * Return value: A new stmt that will be used to replace the multiplication
2138 S1 or S2 stmt. */
2140 static gimple *
2141 vect_recog_mult_pattern (vec<gimple *> *stmts,
2142 tree *type_in, tree *type_out)
2144 gimple *last_stmt = stmts->pop ();
2145 tree oprnd0, oprnd1, vectype, itype;
2146 gimple *pattern_stmt, *def_stmt;
2147 optab optab;
2148 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2149 int power2_val, power2_neg_val;
2150 tree shift;
2152 if (!is_gimple_assign (last_stmt))
2153 return NULL;
2155 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2156 return NULL;
2158 oprnd0 = gimple_assign_rhs1 (last_stmt);
2159 oprnd1 = gimple_assign_rhs2 (last_stmt);
2160 itype = TREE_TYPE (oprnd0);
2162 if (TREE_CODE (oprnd0) != SSA_NAME
2163 || TREE_CODE (oprnd1) != INTEGER_CST
2164 || !INTEGRAL_TYPE_P (itype)
2165 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2166 return NULL;
2168 vectype = get_vectype_for_scalar_type (itype);
2169 if (vectype == NULL_TREE)
2170 return NULL;
2172 /* If the target can handle vectorized multiplication natively,
2173 don't attempt to optimize this. */
2174 optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2175 if (optab != unknown_optab)
2177 machine_mode vec_mode = TYPE_MODE (vectype);
2178 int icode = (int) optab_handler (optab, vec_mode);
2179 if (icode != CODE_FOR_nothing)
2180 return NULL;
2183 /* If target cannot handle vector left shift then we cannot
2184 optimize and bail out. */
2185 optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2186 if (!optab
2187 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2188 return NULL;
2190 power2_val = wi::exact_log2 (oprnd1);
2191 power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2193 /* Handle constant operands that are postive or negative powers of 2. */
2194 if (power2_val != -1)
2196 shift = build_int_cst (itype, power2_val);
2197 pattern_stmt
2198 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2199 LSHIFT_EXPR, oprnd0, shift);
2201 else if (power2_neg_val != -1)
2203 /* If the target cannot handle vector NEGATE then we cannot
2204 do the optimization. */
2205 optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2206 if (!optab
2207 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2208 return NULL;
2210 shift = build_int_cst (itype, power2_neg_val);
2211 def_stmt
2212 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2213 LSHIFT_EXPR, oprnd0, shift);
2214 new_pattern_def_seq (stmt_vinfo, def_stmt);
2215 pattern_stmt
2216 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2217 NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2219 else
2220 return NULL;
2222 /* Pattern detected. */
2223 if (dump_enabled_p ())
2224 dump_printf_loc (MSG_NOTE, vect_location,
2225 "vect_recog_mult_pattern: detected:\n");
2227 if (dump_enabled_p ())
2228 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2229 pattern_stmt,0);
2231 stmts->safe_push (last_stmt);
2232 *type_in = vectype;
2233 *type_out = vectype;
2235 return pattern_stmt;
2238 /* Detect a signed division by a constant that wouldn't be
2239 otherwise vectorized:
2241 type a_t, b_t;
2243 S1 a_t = b_t / N;
2245 where type 'type' is an integral type and N is a constant.
2247 Similarly handle modulo by a constant:
2249 S4 a_t = b_t % N;
2251 Input/Output:
2253 * STMTS: Contains a stmt from which the pattern search begins,
2254 i.e. the division stmt. S1 is replaced by if N is a power
2255 of two constant and type is signed:
2256 S3 y_t = b_t < 0 ? N - 1 : 0;
2257 S2 x_t = b_t + y_t;
2258 S1' a_t = x_t >> log2 (N);
2260 S4 is replaced if N is a power of two constant and
2261 type is signed by (where *_T temporaries have unsigned type):
2262 S9 y_T = b_t < 0 ? -1U : 0U;
2263 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2264 S7 z_t = (type) z_T;
2265 S6 w_t = b_t + z_t;
2266 S5 x_t = w_t & (N - 1);
2267 S4' a_t = x_t - z_t;
2269 Output:
2271 * TYPE_IN: The type of the input arguments to the pattern.
2273 * TYPE_OUT: The type of the output of this pattern.
2275 * Return value: A new stmt that will be used to replace the division
2276 S1 or modulo S4 stmt. */
2278 static gimple *
2279 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2280 tree *type_in, tree *type_out)
2282 gimple *last_stmt = stmts->pop ();
2283 tree oprnd0, oprnd1, vectype, itype, cond;
2284 gimple *pattern_stmt, *def_stmt;
2285 enum tree_code rhs_code;
2286 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2287 vec_info *vinfo = stmt_vinfo->vinfo;
2288 optab optab;
2289 tree q;
2290 int dummy_int, prec;
2291 stmt_vec_info def_stmt_vinfo;
2293 if (!is_gimple_assign (last_stmt))
2294 return NULL;
2296 rhs_code = gimple_assign_rhs_code (last_stmt);
2297 switch (rhs_code)
2299 case TRUNC_DIV_EXPR:
2300 case TRUNC_MOD_EXPR:
2301 break;
2302 default:
2303 return NULL;
2306 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2307 return NULL;
2309 oprnd0 = gimple_assign_rhs1 (last_stmt);
2310 oprnd1 = gimple_assign_rhs2 (last_stmt);
2311 itype = TREE_TYPE (oprnd0);
2312 if (TREE_CODE (oprnd0) != SSA_NAME
2313 || TREE_CODE (oprnd1) != INTEGER_CST
2314 || TREE_CODE (itype) != INTEGER_TYPE
2315 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2316 return NULL;
2318 vectype = get_vectype_for_scalar_type (itype);
2319 if (vectype == NULL_TREE)
2320 return NULL;
2322 /* If the target can handle vectorized division or modulo natively,
2323 don't attempt to optimize this. */
2324 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2325 if (optab != unknown_optab)
2327 machine_mode vec_mode = TYPE_MODE (vectype);
2328 int icode = (int) optab_handler (optab, vec_mode);
2329 if (icode != CODE_FOR_nothing)
2330 return NULL;
2333 prec = TYPE_PRECISION (itype);
2334 if (integer_pow2p (oprnd1))
2336 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2337 return NULL;
2339 /* Pattern detected. */
2340 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_NOTE, vect_location,
2342 "vect_recog_divmod_pattern: detected:\n");
2344 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2345 build_int_cst (itype, 0));
2346 if (rhs_code == TRUNC_DIV_EXPR)
2348 tree var = vect_recog_temp_ssa_var (itype, NULL);
2349 tree shift;
2350 def_stmt
2351 = gimple_build_assign (var, COND_EXPR, cond,
2352 fold_build2 (MINUS_EXPR, itype, oprnd1,
2353 build_int_cst (itype, 1)),
2354 build_int_cst (itype, 0));
2355 new_pattern_def_seq (stmt_vinfo, def_stmt);
2356 var = vect_recog_temp_ssa_var (itype, NULL);
2357 def_stmt
2358 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2359 gimple_assign_lhs (def_stmt));
2360 append_pattern_def_seq (stmt_vinfo, def_stmt);
2362 shift = build_int_cst (itype, tree_log2 (oprnd1));
2363 pattern_stmt
2364 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2365 RSHIFT_EXPR, var, shift);
2367 else
2369 tree signmask;
2370 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2371 if (compare_tree_int (oprnd1, 2) == 0)
2373 signmask = vect_recog_temp_ssa_var (itype, NULL);
2374 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2375 build_int_cst (itype, 1),
2376 build_int_cst (itype, 0));
2377 append_pattern_def_seq (stmt_vinfo, def_stmt);
2379 else
2381 tree utype
2382 = build_nonstandard_integer_type (prec, 1);
2383 tree vecutype = get_vectype_for_scalar_type (utype);
2384 tree shift
2385 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2386 - tree_log2 (oprnd1));
2387 tree var = vect_recog_temp_ssa_var (utype, NULL);
2389 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2390 build_int_cst (utype, -1),
2391 build_int_cst (utype, 0));
2392 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2393 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2394 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2395 append_pattern_def_seq (stmt_vinfo, def_stmt);
2396 var = vect_recog_temp_ssa_var (utype, NULL);
2397 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2398 gimple_assign_lhs (def_stmt),
2399 shift);
2400 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2401 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2402 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2403 append_pattern_def_seq (stmt_vinfo, def_stmt);
2404 signmask = vect_recog_temp_ssa_var (itype, NULL);
2405 def_stmt
2406 = gimple_build_assign (signmask, NOP_EXPR, var);
2407 append_pattern_def_seq (stmt_vinfo, def_stmt);
2409 def_stmt
2410 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2411 PLUS_EXPR, oprnd0, signmask);
2412 append_pattern_def_seq (stmt_vinfo, def_stmt);
2413 def_stmt
2414 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2415 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2416 fold_build2 (MINUS_EXPR, itype, oprnd1,
2417 build_int_cst (itype, 1)));
2418 append_pattern_def_seq (stmt_vinfo, def_stmt);
2420 pattern_stmt
2421 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2422 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2423 signmask);
2426 if (dump_enabled_p ())
2427 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2430 stmts->safe_push (last_stmt);
2432 *type_in = vectype;
2433 *type_out = vectype;
2434 return pattern_stmt;
2437 if (prec > HOST_BITS_PER_WIDE_INT
2438 || integer_zerop (oprnd1))
2439 return NULL;
2441 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2442 return NULL;
2444 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2446 if (TYPE_UNSIGNED (itype))
2448 unsigned HOST_WIDE_INT mh, ml;
2449 int pre_shift, post_shift;
2450 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2451 & GET_MODE_MASK (TYPE_MODE (itype)));
2452 tree t1, t2, t3, t4;
2454 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2455 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2456 return NULL;
2458 /* Find a suitable multiplier and right shift count
2459 instead of multiplying with D. */
2460 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2462 /* If the suggested multiplier is more than SIZE bits, we can do better
2463 for even divisors, using an initial right shift. */
2464 if (mh != 0 && (d & 1) == 0)
2466 pre_shift = floor_log2 (d & -d);
2467 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2468 &ml, &post_shift, &dummy_int);
2469 gcc_assert (!mh);
2471 else
2472 pre_shift = 0;
2474 if (mh != 0)
2476 if (post_shift - 1 >= prec)
2477 return NULL;
2479 /* t1 = oprnd0 h* ml;
2480 t2 = oprnd0 - t1;
2481 t3 = t2 >> 1;
2482 t4 = t1 + t3;
2483 q = t4 >> (post_shift - 1); */
2484 t1 = vect_recog_temp_ssa_var (itype, NULL);
2485 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2486 build_int_cst (itype, ml));
2487 append_pattern_def_seq (stmt_vinfo, def_stmt);
2489 t2 = vect_recog_temp_ssa_var (itype, NULL);
2490 def_stmt
2491 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2492 append_pattern_def_seq (stmt_vinfo, def_stmt);
2494 t3 = vect_recog_temp_ssa_var (itype, NULL);
2495 def_stmt
2496 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2497 append_pattern_def_seq (stmt_vinfo, def_stmt);
2499 t4 = vect_recog_temp_ssa_var (itype, NULL);
2500 def_stmt
2501 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2503 if (post_shift != 1)
2505 append_pattern_def_seq (stmt_vinfo, def_stmt);
2507 q = vect_recog_temp_ssa_var (itype, NULL);
2508 pattern_stmt
2509 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2510 build_int_cst (itype, post_shift - 1));
2512 else
2514 q = t4;
2515 pattern_stmt = def_stmt;
2518 else
2520 if (pre_shift >= prec || post_shift >= prec)
2521 return NULL;
2523 /* t1 = oprnd0 >> pre_shift;
2524 t2 = t1 h* ml;
2525 q = t2 >> post_shift; */
2526 if (pre_shift)
2528 t1 = vect_recog_temp_ssa_var (itype, NULL);
2529 def_stmt
2530 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2531 build_int_cst (NULL, pre_shift));
2532 append_pattern_def_seq (stmt_vinfo, def_stmt);
2534 else
2535 t1 = oprnd0;
2537 t2 = vect_recog_temp_ssa_var (itype, NULL);
2538 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2539 build_int_cst (itype, ml));
2541 if (post_shift)
2543 append_pattern_def_seq (stmt_vinfo, def_stmt);
2545 q = vect_recog_temp_ssa_var (itype, NULL);
2546 def_stmt
2547 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2548 build_int_cst (itype, post_shift));
2550 else
2551 q = t2;
2553 pattern_stmt = def_stmt;
2556 else
2558 unsigned HOST_WIDE_INT ml;
2559 int post_shift;
2560 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2561 unsigned HOST_WIDE_INT abs_d;
2562 bool add = false;
2563 tree t1, t2, t3, t4;
2565 /* Give up for -1. */
2566 if (d == -1)
2567 return NULL;
2569 /* Since d might be INT_MIN, we have to cast to
2570 unsigned HOST_WIDE_INT before negating to avoid
2571 undefined signed overflow. */
2572 abs_d = (d >= 0
2573 ? (unsigned HOST_WIDE_INT) d
2574 : - (unsigned HOST_WIDE_INT) d);
2576 /* n rem d = n rem -d */
2577 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2579 d = abs_d;
2580 oprnd1 = build_int_cst (itype, abs_d);
2582 else if (HOST_BITS_PER_WIDE_INT >= prec
2583 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2584 /* This case is not handled correctly below. */
2585 return NULL;
2587 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2588 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2590 add = true;
2591 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2593 if (post_shift >= prec)
2594 return NULL;
2596 /* t1 = oprnd0 h* ml; */
2597 t1 = vect_recog_temp_ssa_var (itype, NULL);
2598 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2599 build_int_cst (itype, ml));
2601 if (add)
2603 /* t2 = t1 + oprnd0; */
2604 append_pattern_def_seq (stmt_vinfo, def_stmt);
2605 t2 = vect_recog_temp_ssa_var (itype, NULL);
2606 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2608 else
2609 t2 = t1;
2611 if (post_shift)
2613 /* t3 = t2 >> post_shift; */
2614 append_pattern_def_seq (stmt_vinfo, def_stmt);
2615 t3 = vect_recog_temp_ssa_var (itype, NULL);
2616 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2617 build_int_cst (itype, post_shift));
2619 else
2620 t3 = t2;
2622 wide_int oprnd0_min, oprnd0_max;
2623 int msb = 1;
2624 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2626 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2627 msb = 0;
2628 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2629 msb = -1;
2632 if (msb == 0 && d >= 0)
2634 /* q = t3; */
2635 q = t3;
2636 pattern_stmt = def_stmt;
2638 else
2640 /* t4 = oprnd0 >> (prec - 1);
2641 or if we know from VRP that oprnd0 >= 0
2642 t4 = 0;
2643 or if we know from VRP that oprnd0 < 0
2644 t4 = -1; */
2645 append_pattern_def_seq (stmt_vinfo, def_stmt);
2646 t4 = vect_recog_temp_ssa_var (itype, NULL);
2647 if (msb != 1)
2648 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2649 build_int_cst (itype, msb));
2650 else
2651 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2652 build_int_cst (itype, prec - 1));
2653 append_pattern_def_seq (stmt_vinfo, def_stmt);
2655 /* q = t3 - t4; or q = t4 - t3; */
2656 q = vect_recog_temp_ssa_var (itype, NULL);
2657 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2658 d < 0 ? t3 : t4);
2662 if (rhs_code == TRUNC_MOD_EXPR)
2664 tree r, t1;
2666 /* We divided. Now finish by:
2667 t1 = q * oprnd1;
2668 r = oprnd0 - t1; */
2669 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2671 t1 = vect_recog_temp_ssa_var (itype, NULL);
2672 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2673 append_pattern_def_seq (stmt_vinfo, def_stmt);
2675 r = vect_recog_temp_ssa_var (itype, NULL);
2676 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2679 /* Pattern detected. */
2680 if (dump_enabled_p ())
2682 dump_printf_loc (MSG_NOTE, vect_location,
2683 "vect_recog_divmod_pattern: detected: ");
2684 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2685 dump_printf (MSG_NOTE, "\n");
2688 stmts->safe_push (last_stmt);
2690 *type_in = vectype;
2691 *type_out = vectype;
2692 return pattern_stmt;
2695 /* Function vect_recog_mixed_size_cond_pattern
2697 Try to find the following pattern:
2699 type x_t, y_t;
2700 TYPE a_T, b_T, c_T;
2701 loop:
2702 S1 a_T = x_t CMP y_t ? b_T : c_T;
2704 where type 'TYPE' is an integral type which has different size
2705 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2706 than 'type', the constants need to fit into an integer type
2707 with the same width as 'type') or results of conversion from 'type'.
2709 Input:
2711 * LAST_STMT: A stmt from which the pattern search begins.
2713 Output:
2715 * TYPE_IN: The type of the input arguments to the pattern.
2717 * TYPE_OUT: The type of the output of this pattern.
2719 * Return value: A new stmt that will be used to replace the pattern.
2720 Additionally a def_stmt is added.
2722 a_it = x_t CMP y_t ? b_it : c_it;
2723 a_T = (TYPE) a_it; */
2725 static gimple *
2726 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
2727 tree *type_out)
2729 gimple *last_stmt = (*stmts)[0];
2730 tree cond_expr, then_clause, else_clause;
2731 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2732 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2733 gimple *pattern_stmt, *def_stmt;
2734 vec_info *vinfo = stmt_vinfo->vinfo;
2735 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2736 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
2737 bool promotion;
2738 tree comp_scalar_type;
2740 if (!is_gimple_assign (last_stmt)
2741 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2742 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2743 return NULL;
2745 cond_expr = gimple_assign_rhs1 (last_stmt);
2746 then_clause = gimple_assign_rhs2 (last_stmt);
2747 else_clause = gimple_assign_rhs3 (last_stmt);
2749 if (!COMPARISON_CLASS_P (cond_expr))
2750 return NULL;
2752 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2753 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2754 if (comp_vectype == NULL_TREE)
2755 return NULL;
2757 type = gimple_expr_type (last_stmt);
2758 if (types_compatible_p (type, comp_scalar_type)
2759 || ((TREE_CODE (then_clause) != INTEGER_CST
2760 || TREE_CODE (else_clause) != INTEGER_CST)
2761 && !INTEGRAL_TYPE_P (comp_scalar_type))
2762 || !INTEGRAL_TYPE_P (type))
2763 return NULL;
2765 if ((TREE_CODE (then_clause) != INTEGER_CST
2766 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2767 &def_stmt0, &promotion))
2768 || (TREE_CODE (else_clause) != INTEGER_CST
2769 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2770 &def_stmt1, &promotion)))
2771 return NULL;
2773 if (orig_type0 && orig_type1
2774 && !types_compatible_p (orig_type0, orig_type1))
2775 return NULL;
2777 if (orig_type0)
2779 if (!types_compatible_p (orig_type0, comp_scalar_type))
2780 return NULL;
2781 then_clause = gimple_assign_rhs1 (def_stmt0);
2782 itype = orig_type0;
2785 if (orig_type1)
2787 if (!types_compatible_p (orig_type1, comp_scalar_type))
2788 return NULL;
2789 else_clause = gimple_assign_rhs1 (def_stmt1);
2790 itype = orig_type1;
2794 HOST_WIDE_INT cmp_mode_size
2795 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2797 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
2798 return NULL;
2800 vectype = get_vectype_for_scalar_type (type);
2801 if (vectype == NULL_TREE)
2802 return NULL;
2804 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2805 return NULL;
2807 if (itype == NULL_TREE)
2808 itype = build_nonstandard_integer_type (cmp_mode_size,
2809 TYPE_UNSIGNED (type));
2811 if (itype == NULL_TREE
2812 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
2813 return NULL;
2815 vecitype = get_vectype_for_scalar_type (itype);
2816 if (vecitype == NULL_TREE)
2817 return NULL;
2819 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2820 return NULL;
2822 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
2824 if ((TREE_CODE (then_clause) == INTEGER_CST
2825 && !int_fits_type_p (then_clause, itype))
2826 || (TREE_CODE (else_clause) == INTEGER_CST
2827 && !int_fits_type_p (else_clause, itype)))
2828 return NULL;
2831 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2832 COND_EXPR, unshare_expr (cond_expr),
2833 fold_convert (itype, then_clause),
2834 fold_convert (itype, else_clause));
2835 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2836 NOP_EXPR, gimple_assign_lhs (def_stmt));
2838 new_pattern_def_seq (stmt_vinfo, def_stmt);
2839 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
2840 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2841 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2842 *type_in = vecitype;
2843 *type_out = vectype;
2845 if (dump_enabled_p ())
2846 dump_printf_loc (MSG_NOTE, vect_location,
2847 "vect_recog_mixed_size_cond_pattern: detected:\n");
2849 return pattern_stmt;
2853 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2854 true if bool VAR can and should be optimized that way. Assume it shouldn't
2855 in case it's a result of a comparison which can be directly vectorized into
2856 a vector comparison. */
2858 static bool
2859 check_bool_pattern (tree var, vec_info *vinfo)
2861 gimple *def_stmt;
2862 enum vect_def_type dt;
2863 tree rhs1;
2864 enum tree_code rhs_code;
2866 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
2867 return false;
2869 if (dt != vect_internal_def)
2870 return false;
2872 if (!is_gimple_assign (def_stmt))
2873 return false;
2875 if (!has_single_use (var))
2876 return false;
2878 rhs1 = gimple_assign_rhs1 (def_stmt);
2879 rhs_code = gimple_assign_rhs_code (def_stmt);
2880 switch (rhs_code)
2882 case SSA_NAME:
2883 return check_bool_pattern (rhs1, vinfo);
2885 CASE_CONVERT:
2886 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2887 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2888 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2889 return false;
2890 return check_bool_pattern (rhs1, vinfo);
2892 case BIT_NOT_EXPR:
2893 return check_bool_pattern (rhs1, vinfo);
2895 case BIT_AND_EXPR:
2896 case BIT_IOR_EXPR:
2897 case BIT_XOR_EXPR:
2898 if (!check_bool_pattern (rhs1, vinfo))
2899 return false;
2900 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
2902 default:
2903 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2905 tree vecitype, comp_vectype, mask_type;
2907 /* If the comparison can throw, then is_gimple_condexpr will be
2908 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2909 if (stmt_could_throw_p (def_stmt))
2910 return false;
2912 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2913 if (comp_vectype == NULL_TREE)
2914 return false;
2916 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
2917 if (mask_type
2918 && expand_vec_cmp_expr_p (comp_vectype, mask_type))
2919 return false;
2921 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2923 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2924 tree itype
2925 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2926 vecitype = get_vectype_for_scalar_type (itype);
2927 if (vecitype == NULL_TREE)
2928 return false;
2930 else
2931 vecitype = comp_vectype;
2932 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2934 return false;
2939 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2940 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2941 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2943 static tree
2944 adjust_bool_pattern_cast (tree type, tree var)
2946 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2947 gimple *cast_stmt, *pattern_stmt;
2949 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2950 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2951 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2952 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2953 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2954 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2955 return gimple_assign_lhs (cast_stmt);
2959 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2960 recursively. VAR is an SSA_NAME that should be transformed from bool
2961 to a wider integer type, OUT_TYPE is the desired final integer type of
2962 the whole pattern, TRUEVAL should be NULL unless optimizing
2963 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2964 in the then_clause, STMTS is where statements with added pattern stmts
2965 should be pushed to. */
2967 static tree
2968 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2969 vec<gimple *> *stmts)
2971 gimple *stmt = SSA_NAME_DEF_STMT (var);
2972 enum tree_code rhs_code, def_rhs_code;
2973 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2974 location_t loc;
2975 gimple *pattern_stmt, *def_stmt;
2977 rhs1 = gimple_assign_rhs1 (stmt);
2978 rhs2 = gimple_assign_rhs2 (stmt);
2979 rhs_code = gimple_assign_rhs_code (stmt);
2980 loc = gimple_location (stmt);
2981 switch (rhs_code)
2983 case SSA_NAME:
2984 CASE_CONVERT:
2985 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2986 itype = TREE_TYPE (irhs1);
2987 pattern_stmt
2988 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2989 SSA_NAME, irhs1);
2990 break;
2992 case BIT_NOT_EXPR:
2993 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2994 itype = TREE_TYPE (irhs1);
2995 pattern_stmt
2996 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2997 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
2998 break;
3000 case BIT_AND_EXPR:
3001 /* Try to optimize x = y & (a < b ? 1 : 0); into
3002 x = (a < b ? y : 0);
3004 E.g. for:
3005 bool a_b, b_b, c_b;
3006 TYPE d_T;
3008 S1 a_b = x1 CMP1 y1;
3009 S2 b_b = x2 CMP2 y2;
3010 S3 c_b = a_b & b_b;
3011 S4 d_T = (TYPE) c_b;
3013 we would normally emit:
3015 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3016 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3017 S3' c_T = a_T & b_T;
3018 S4' d_T = c_T;
3020 but we can save one stmt by using the
3021 result of one of the COND_EXPRs in the other COND_EXPR and leave
3022 BIT_AND_EXPR stmt out:
3024 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3025 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3026 S4' f_T = c_T;
3028 At least when VEC_COND_EXPR is implemented using masks
3029 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3030 computes the comparison masks and ands it, in one case with
3031 all ones vector, in the other case with a vector register.
3032 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3033 often more expensive. */
3034 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3035 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3036 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3038 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3039 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3040 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3041 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3043 gimple *tstmt;
3044 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3045 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3046 tstmt = stmts->pop ();
3047 gcc_assert (tstmt == def_stmt);
3048 stmts->quick_push (stmt);
3049 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3050 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3051 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3052 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3053 return irhs2;
3055 else
3056 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3057 goto and_ior_xor;
3059 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3060 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3061 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3063 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3064 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3065 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3066 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3068 gimple *tstmt;
3069 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3070 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3071 tstmt = stmts->pop ();
3072 gcc_assert (tstmt == def_stmt);
3073 stmts->quick_push (stmt);
3074 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3075 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3076 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3077 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3078 return irhs1;
3080 else
3081 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3082 goto and_ior_xor;
3084 /* FALLTHRU */
3085 case BIT_IOR_EXPR:
3086 case BIT_XOR_EXPR:
3087 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3088 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3089 and_ior_xor:
3090 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3091 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3093 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3094 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3095 int out_prec = TYPE_PRECISION (out_type);
3096 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3097 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3098 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3099 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3100 else
3102 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3103 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3106 itype = TREE_TYPE (irhs1);
3107 pattern_stmt
3108 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3109 rhs_code, irhs1, irhs2);
3110 break;
3112 default:
3113 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3114 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3115 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3116 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3117 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3119 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3120 itype
3121 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3123 else
3124 itype = TREE_TYPE (rhs1);
3125 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3126 if (trueval == NULL_TREE)
3127 trueval = build_int_cst (itype, 1);
3128 else
3129 gcc_checking_assert (useless_type_conversion_p (itype,
3130 TREE_TYPE (trueval)));
3131 pattern_stmt
3132 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3133 COND_EXPR, cond_expr, trueval,
3134 build_int_cst (itype, 0));
3135 break;
3138 stmts->safe_push (stmt);
3139 gimple_set_location (pattern_stmt, loc);
3140 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3141 return gimple_assign_lhs (pattern_stmt);
3145 /* Return the proper type for converting bool VAR into
3146 an integer value or NULL_TREE if no such type exists.
3147 The type is chosen so that converted value has the
3148 same number of elements as VAR's vector type. */
3150 static tree
3151 search_type_for_mask (tree var, vec_info *vinfo)
3153 gimple *def_stmt;
3154 enum vect_def_type dt;
3155 tree rhs1;
3156 enum tree_code rhs_code;
3157 tree res = NULL_TREE, res2;
3159 if (TREE_CODE (var) != SSA_NAME)
3160 return NULL_TREE;
3162 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3163 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3164 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3165 return NULL_TREE;
3167 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3168 return NULL_TREE;
3170 if (dt != vect_internal_def)
3171 return NULL_TREE;
3173 if (!is_gimple_assign (def_stmt))
3174 return NULL_TREE;
3176 rhs_code = gimple_assign_rhs_code (def_stmt);
3177 rhs1 = gimple_assign_rhs1 (def_stmt);
3179 switch (rhs_code)
3181 case SSA_NAME:
3182 case BIT_NOT_EXPR:
3183 CASE_CONVERT:
3184 res = search_type_for_mask (rhs1, vinfo);
3185 break;
3187 case BIT_AND_EXPR:
3188 case BIT_IOR_EXPR:
3189 case BIT_XOR_EXPR:
3190 res = search_type_for_mask (rhs1, vinfo);
3191 res2 = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo);
3192 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3193 res = res2;
3194 break;
3196 default:
3197 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3199 tree comp_vectype, mask_type;
3201 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3202 if (comp_vectype == NULL_TREE)
3203 return NULL_TREE;
3205 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3206 if (!mask_type
3207 || !expand_vec_cmp_expr_p (comp_vectype, mask_type))
3208 return NULL_TREE;
3210 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3211 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3213 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3214 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3216 else
3217 res = TREE_TYPE (rhs1);
3221 return res;
3225 /* Function vect_recog_bool_pattern
3227 Try to find pattern like following:
3229 bool a_b, b_b, c_b, d_b, e_b;
3230 TYPE f_T;
3231 loop:
3232 S1 a_b = x1 CMP1 y1;
3233 S2 b_b = x2 CMP2 y2;
3234 S3 c_b = a_b & b_b;
3235 S4 d_b = x3 CMP3 y3;
3236 S5 e_b = c_b | d_b;
3237 S6 f_T = (TYPE) e_b;
3239 where type 'TYPE' is an integral type. Or a similar pattern
3240 ending in
3242 S6 f_Y = e_b ? r_Y : s_Y;
3244 as results from if-conversion of a complex condition.
3246 Input:
3248 * LAST_STMT: A stmt at the end from which the pattern
3249 search begins, i.e. cast of a bool to
3250 an integer type.
3252 Output:
3254 * TYPE_IN: The type of the input arguments to the pattern.
3256 * TYPE_OUT: The type of the output of this pattern.
3258 * Return value: A new stmt that will be used to replace the pattern.
3260 Assuming size of TYPE is the same as size of all comparisons
3261 (otherwise some casts would be added where needed), the above
3262 sequence we create related pattern stmts:
3263 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3264 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3265 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3266 S5' e_T = c_T | d_T;
3267 S6' f_T = e_T;
3269 Instead of the above S3' we could emit:
3270 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3271 S3' c_T = a_T | b_T;
3272 but the above is more efficient. */
3274 static gimple *
3275 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3276 tree *type_out)
3278 gimple *last_stmt = stmts->pop ();
3279 enum tree_code rhs_code;
3280 tree var, lhs, rhs, vectype;
3281 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3282 stmt_vec_info new_stmt_info;
3283 vec_info *vinfo = stmt_vinfo->vinfo;
3284 gimple *pattern_stmt;
3286 if (!is_gimple_assign (last_stmt))
3287 return NULL;
3289 var = gimple_assign_rhs1 (last_stmt);
3290 lhs = gimple_assign_lhs (last_stmt);
3292 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3293 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3294 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3295 return NULL;
3297 rhs_code = gimple_assign_rhs_code (last_stmt);
3298 if (CONVERT_EXPR_CODE_P (rhs_code))
3300 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3301 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3302 return NULL;
3303 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3304 if (vectype == NULL_TREE)
3305 return NULL;
3307 if (check_bool_pattern (var, vinfo))
3309 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3310 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3311 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3312 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3313 else
3314 pattern_stmt
3315 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3317 else
3319 tree type = search_type_for_mask (var, vinfo);
3320 tree cst0, cst1, tmp;
3322 if (!type)
3323 return NULL;
3325 /* We may directly use cond with narrowed type to avoid
3326 multiple cond exprs with following result packing and
3327 perform single cond with packed mask instead. In case
3328 of widening we better make cond first and then extract
3329 results. */
3330 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3331 type = TREE_TYPE (lhs);
3333 cst0 = build_int_cst (type, 0);
3334 cst1 = build_int_cst (type, 1);
3335 tmp = vect_recog_temp_ssa_var (type, NULL);
3336 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3338 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3340 tree new_vectype = get_vectype_for_scalar_type (type);
3341 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3342 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3343 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3344 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3346 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3347 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3351 *type_out = vectype;
3352 *type_in = vectype;
3353 stmts->safe_push (last_stmt);
3354 if (dump_enabled_p ())
3355 dump_printf_loc (MSG_NOTE, vect_location,
3356 "vect_recog_bool_pattern: detected:\n");
3358 return pattern_stmt;
3360 else if (rhs_code == COND_EXPR
3361 && TREE_CODE (var) == SSA_NAME)
3363 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3364 if (vectype == NULL_TREE)
3365 return NULL;
3367 /* Build a scalar type for the boolean result that when
3368 vectorized matches the vector type of the result in
3369 size and number of elements. */
3370 unsigned prec
3371 = wi::udiv_trunc (TYPE_SIZE (vectype),
3372 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3373 tree type
3374 = build_nonstandard_integer_type (prec,
3375 TYPE_UNSIGNED (TREE_TYPE (var)));
3376 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3377 return NULL;
3379 if (!check_bool_pattern (var, vinfo))
3380 return NULL;
3382 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3384 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3385 pattern_stmt
3386 = gimple_build_assign (lhs, COND_EXPR,
3387 build2 (NE_EXPR, boolean_type_node,
3388 rhs, build_int_cst (type, 0)),
3389 gimple_assign_rhs2 (last_stmt),
3390 gimple_assign_rhs3 (last_stmt));
3391 *type_out = vectype;
3392 *type_in = vectype;
3393 stmts->safe_push (last_stmt);
3394 if (dump_enabled_p ())
3395 dump_printf_loc (MSG_NOTE, vect_location,
3396 "vect_recog_bool_pattern: detected:\n");
3398 return pattern_stmt;
3400 else if (rhs_code == SSA_NAME
3401 && STMT_VINFO_DATA_REF (stmt_vinfo))
3403 stmt_vec_info pattern_stmt_info;
3404 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3405 gcc_assert (vectype != NULL_TREE);
3406 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3407 return NULL;
3409 if (check_bool_pattern (var, vinfo))
3410 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype),
3411 NULL_TREE, stmts);
3412 else
3414 tree type = search_type_for_mask (var, vinfo);
3415 tree cst0, cst1, new_vectype;
3417 if (!type)
3418 return NULL;
3420 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3421 type = TREE_TYPE (vectype);
3423 cst0 = build_int_cst (type, 0);
3424 cst1 = build_int_cst (type, 1);
3425 new_vectype = get_vectype_for_scalar_type (type);
3427 rhs = vect_recog_temp_ssa_var (type, NULL);
3428 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3430 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3431 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3432 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3433 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3436 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3437 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3439 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3440 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3441 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3442 rhs = rhs2;
3444 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3445 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3446 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3447 STMT_VINFO_DATA_REF (pattern_stmt_info)
3448 = STMT_VINFO_DATA_REF (stmt_vinfo);
3449 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3450 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3451 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3452 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3453 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3454 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3455 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3456 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3457 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3458 *type_out = vectype;
3459 *type_in = vectype;
3460 stmts->safe_push (last_stmt);
3461 if (dump_enabled_p ())
3462 dump_printf_loc (MSG_NOTE, vect_location,
3463 "vect_recog_bool_pattern: detected:\n");
3464 return pattern_stmt;
3466 else
3467 return NULL;
3471 /* A helper for vect_recog_mask_conversion_pattern. Build
3472 conversion of MASK to a type suitable for masking VECTYPE.
3473 Built statement gets required vectype and is appended to
3474 a pattern sequence of STMT_VINFO.
3476 Return converted mask. */
3478 static tree
3479 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3480 vec_info *vinfo)
3482 gimple *stmt;
3483 tree masktype, tmp;
3484 stmt_vec_info new_stmt_info;
3486 masktype = build_same_sized_truth_vector_type (vectype);
3487 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3488 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3489 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3490 set_vinfo_for_stmt (stmt, new_stmt_info);
3491 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3492 append_pattern_def_seq (stmt_vinfo, stmt);
3494 return tmp;
3498 /* Function vect_recog_mask_conversion_pattern
3500 Try to find statements which require boolean type
3501 converison. Additional conversion statements are
3502 added to handle such cases. For example:
3504 bool m_1, m_2, m_3;
3505 int i_4, i_5;
3506 double d_6, d_7;
3507 char c_1, c_2, c_3;
3509 S1 m_1 = i_4 > i_5;
3510 S2 m_2 = d_6 < d_7;
3511 S3 m_3 = m_1 & m_2;
3512 S4 c_1 = m_3 ? c_2 : c_3;
3514 Will be transformed into:
3516 S1 m_1 = i_4 > i_5;
3517 S2 m_2 = d_6 < d_7;
3518 S3'' m_2' = (_Bool[bitsize=32])m_2
3519 S3' m_3' = m_1 & m_2';
3520 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3521 S4' c_1' = m_3'' ? c_2 : c_3; */
3523 static gimple *
3524 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3525 tree *type_out)
3527 gimple *last_stmt = stmts->pop ();
3528 enum tree_code rhs_code;
3529 tree lhs, rhs1, rhs2, tmp, rhs1_type, rhs2_type, vectype1, vectype2;
3530 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3531 stmt_vec_info pattern_stmt_info;
3532 vec_info *vinfo = stmt_vinfo->vinfo;
3533 gimple *pattern_stmt;
3535 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3536 if (is_gimple_call (last_stmt)
3537 && gimple_call_internal_p (last_stmt)
3538 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3539 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3541 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3543 if (load)
3545 lhs = gimple_call_lhs (last_stmt);
3546 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3548 else
3550 rhs2 = gimple_call_arg (last_stmt, 3);
3551 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3554 rhs1 = gimple_call_arg (last_stmt, 2);
3555 rhs1_type = search_type_for_mask (rhs1, vinfo);
3556 if (!rhs1_type)
3557 return NULL;
3558 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3560 if (!vectype1 || !vectype2
3561 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3562 return NULL;
3564 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3566 if (load)
3568 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3569 pattern_stmt
3570 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3571 gimple_call_arg (last_stmt, 0),
3572 gimple_call_arg (last_stmt, 1),
3573 tmp);
3574 gimple_call_set_lhs (pattern_stmt, lhs);
3576 else
3577 pattern_stmt
3578 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3579 gimple_call_arg (last_stmt, 0),
3580 gimple_call_arg (last_stmt, 1),
3581 tmp,
3582 gimple_call_arg (last_stmt, 3));
3585 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3586 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3587 STMT_VINFO_DATA_REF (pattern_stmt_info)
3588 = STMT_VINFO_DATA_REF (stmt_vinfo);
3589 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3590 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3591 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3592 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3593 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3594 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3595 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3596 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3597 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3599 *type_out = vectype1;
3600 *type_in = vectype1;
3601 stmts->safe_push (last_stmt);
3602 if (dump_enabled_p ())
3603 dump_printf_loc (MSG_NOTE, vect_location,
3604 "vect_recog_mask_conversion_pattern: detected:\n");
3606 return pattern_stmt;
3609 if (!is_gimple_assign (last_stmt))
3610 return NULL;
3612 lhs = gimple_assign_lhs (last_stmt);
3613 rhs1 = gimple_assign_rhs1 (last_stmt);
3614 rhs_code = gimple_assign_rhs_code (last_stmt);
3616 /* Check for cond expression requiring mask conversion. */
3617 if (rhs_code == COND_EXPR)
3619 /* vect_recog_mixed_size_cond_pattern could apply.
3620 Do nothing then. */
3621 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3622 return NULL;
3624 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3626 if (TREE_CODE (rhs1) == SSA_NAME)
3628 rhs1_type = search_type_for_mask (rhs1, vinfo);
3629 if (!rhs1_type)
3630 return NULL;
3632 else
3633 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3635 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3637 if (!vectype1 || !vectype2
3638 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3639 return NULL;
3641 /* If rhs1 is a comparison we need to move it into a
3642 separate statement. */
3643 if (TREE_CODE (rhs1) != SSA_NAME)
3645 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3646 pattern_stmt = gimple_build_assign (tmp, rhs1);
3647 rhs1 = tmp;
3649 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3650 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3651 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
3652 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3655 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3657 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3658 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
3659 gimple_assign_rhs2 (last_stmt),
3660 gimple_assign_rhs3 (last_stmt));
3662 *type_out = vectype1;
3663 *type_in = vectype1;
3664 stmts->safe_push (last_stmt);
3665 if (dump_enabled_p ())
3666 dump_printf_loc (MSG_NOTE, vect_location,
3667 "vect_recog_mask_conversion_pattern: detected:\n");
3669 return pattern_stmt;
3672 /* Now check for binary boolean operations requiring conversion for
3673 one of operands. */
3674 if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
3675 return NULL;
3677 if (rhs_code != BIT_IOR_EXPR
3678 && rhs_code != BIT_XOR_EXPR
3679 && rhs_code != BIT_AND_EXPR)
3680 return NULL;
3682 rhs2 = gimple_assign_rhs2 (last_stmt);
3684 rhs1_type = search_type_for_mask (rhs1, vinfo);
3685 rhs2_type = search_type_for_mask (rhs2, vinfo);
3687 if (!rhs1_type || !rhs2_type
3688 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
3689 return NULL;
3691 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
3693 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
3694 if (!vectype1)
3695 return NULL;
3696 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
3698 else
3700 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
3701 if (!vectype1)
3702 return NULL;
3703 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3706 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3707 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
3709 *type_out = vectype1;
3710 *type_in = vectype1;
3711 stmts->safe_push (last_stmt);
3712 if (dump_enabled_p ())
3713 dump_printf_loc (MSG_NOTE, vect_location,
3714 "vect_recog_mask_conversion_pattern: detected:\n");
3716 return pattern_stmt;
3720 /* Mark statements that are involved in a pattern. */
3722 static inline void
3723 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
3724 tree pattern_vectype)
3726 stmt_vec_info pattern_stmt_info, def_stmt_info;
3727 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3728 vec_info *vinfo = orig_stmt_info->vinfo;
3729 gimple *def_stmt;
3731 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3732 if (pattern_stmt_info == NULL)
3734 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3735 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3737 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3739 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3740 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3741 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3742 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3743 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3744 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3745 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3746 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3747 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3749 gimple_stmt_iterator si;
3750 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3751 !gsi_end_p (si); gsi_next (&si))
3753 def_stmt = gsi_stmt (si);
3754 def_stmt_info = vinfo_for_stmt (def_stmt);
3755 if (def_stmt_info == NULL)
3757 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3758 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3760 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3761 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3762 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3763 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3764 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3769 /* Function vect_pattern_recog_1
3771 Input:
3772 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3773 computation pattern.
3774 STMT: A stmt from which the pattern search should start.
3776 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3777 expression that computes the same functionality and can be used to
3778 replace the sequence of stmts that are involved in the pattern.
3780 Output:
3781 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3782 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3783 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3784 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3785 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3786 to the available target pattern.
3788 This function also does some bookkeeping, as explained in the documentation
3789 for vect_recog_pattern. */
3791 static void
3792 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3793 gimple_stmt_iterator si,
3794 vec<gimple *> *stmts_to_replace)
3796 gimple *stmt = gsi_stmt (si), *pattern_stmt;
3797 stmt_vec_info stmt_info;
3798 loop_vec_info loop_vinfo;
3799 tree pattern_vectype;
3800 tree type_in, type_out;
3801 enum tree_code code;
3802 int i;
3803 gimple *next;
3805 stmts_to_replace->truncate (0);
3806 stmts_to_replace->quick_push (stmt);
3807 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3808 if (!pattern_stmt)
3809 return;
3811 stmt = stmts_to_replace->last ();
3812 stmt_info = vinfo_for_stmt (stmt);
3813 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3815 if (VECTOR_BOOLEAN_TYPE_P (type_in)
3816 || VECTOR_MODE_P (TYPE_MODE (type_in)))
3818 /* No need to check target support (already checked by the pattern
3819 recognition function). */
3820 pattern_vectype = type_out ? type_out : type_in;
3822 else
3824 machine_mode vec_mode;
3825 enum insn_code icode;
3826 optab optab;
3828 /* Check target support */
3829 type_in = get_vectype_for_scalar_type (type_in);
3830 if (!type_in)
3831 return;
3832 if (type_out)
3833 type_out = get_vectype_for_scalar_type (type_out);
3834 else
3835 type_out = type_in;
3836 if (!type_out)
3837 return;
3838 pattern_vectype = type_out;
3840 if (is_gimple_assign (pattern_stmt))
3841 code = gimple_assign_rhs_code (pattern_stmt);
3842 else
3844 gcc_assert (is_gimple_call (pattern_stmt));
3845 code = CALL_EXPR;
3848 optab = optab_for_tree_code (code, type_in, optab_default);
3849 vec_mode = TYPE_MODE (type_in);
3850 if (!optab
3851 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3852 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3853 return;
3856 /* Found a vectorizable pattern. */
3857 if (dump_enabled_p ())
3859 dump_printf_loc (MSG_NOTE, vect_location,
3860 "pattern recognized: ");
3861 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3864 /* Mark the stmts that are involved in the pattern. */
3865 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3867 /* Patterns cannot be vectorized using SLP, because they change the order of
3868 computation. */
3869 if (loop_vinfo)
3870 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3871 if (next == stmt)
3872 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3874 /* It is possible that additional pattern stmts are created and inserted in
3875 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3876 relevant statements. */
3877 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3878 && (unsigned) i < (stmts_to_replace->length () - 1);
3879 i++)
3881 stmt_info = vinfo_for_stmt (stmt);
3882 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3883 if (dump_enabled_p ())
3885 dump_printf_loc (MSG_NOTE, vect_location,
3886 "additional pattern stmt: ");
3887 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3890 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3895 /* Function vect_pattern_recog
3897 Input:
3898 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3899 computation idioms.
3901 Output - for each computation idiom that is detected we create a new stmt
3902 that provides the same functionality and that can be vectorized. We
3903 also record some information in the struct_stmt_info of the relevant
3904 stmts, as explained below:
3906 At the entry to this function we have the following stmts, with the
3907 following initial value in the STMT_VINFO fields:
3909 stmt in_pattern_p related_stmt vec_stmt
3910 S1: a_i = .... - - -
3911 S2: a_2 = ..use(a_i).. - - -
3912 S3: a_1 = ..use(a_2).. - - -
3913 S4: a_0 = ..use(a_1).. - - -
3914 S5: ... = ..use(a_0).. - - -
3916 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3917 represented by a single stmt. We then:
3918 - create a new stmt S6 equivalent to the pattern (the stmt is not
3919 inserted into the code)
3920 - fill in the STMT_VINFO fields as follows:
3922 in_pattern_p related_stmt vec_stmt
3923 S1: a_i = .... - - -
3924 S2: a_2 = ..use(a_i).. - - -
3925 S3: a_1 = ..use(a_2).. - - -
3926 S4: a_0 = ..use(a_1).. true S6 -
3927 '---> S6: a_new = .... - S4 -
3928 S5: ... = ..use(a_0).. - - -
3930 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3931 to each other through the RELATED_STMT field).
3933 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3934 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3935 remain irrelevant unless used by stmts other than S4.
3937 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3938 (because they are marked as irrelevant). It will vectorize S6, and record
3939 a pointer to the new vector stmt VS6 from S6 (as usual).
3940 S4 will be skipped, and S5 will be vectorized as usual:
3942 in_pattern_p related_stmt vec_stmt
3943 S1: a_i = .... - - -
3944 S2: a_2 = ..use(a_i).. - - -
3945 S3: a_1 = ..use(a_2).. - - -
3946 > VS6: va_new = .... - - -
3947 S4: a_0 = ..use(a_1).. true S6 VS6
3948 '---> S6: a_new = .... - S4 VS6
3949 > VS5: ... = ..vuse(va_new).. - - -
3950 S5: ... = ..use(a_0).. - - -
3952 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3953 elsewhere), and we'll end up with:
3955 VS6: va_new = ....
3956 VS5: ... = ..vuse(va_new)..
3958 In case of more than one pattern statements, e.g., widen-mult with
3959 intermediate type:
3961 S1 a_t = ;
3962 S2 a_T = (TYPE) a_t;
3963 '--> S3: a_it = (interm_type) a_t;
3964 S4 prod_T = a_T * CONST;
3965 '--> S5: prod_T' = a_it w* CONST;
3967 there may be other users of a_T outside the pattern. In that case S2 will
3968 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3969 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3970 be recorded in S3. */
3972 void
3973 vect_pattern_recog (vec_info *vinfo)
3975 struct loop *loop;
3976 basic_block *bbs;
3977 unsigned int nbbs;
3978 gimple_stmt_iterator si;
3979 unsigned int i, j;
3980 vect_recog_func_ptr vect_recog_func;
3981 auto_vec<gimple *, 1> stmts_to_replace;
3982 gimple *stmt;
3984 if (dump_enabled_p ())
3985 dump_printf_loc (MSG_NOTE, vect_location,
3986 "=== vect_pattern_recog ===\n");
3988 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
3990 loop = LOOP_VINFO_LOOP (loop_vinfo);
3991 bbs = LOOP_VINFO_BBS (loop_vinfo);
3992 nbbs = loop->num_nodes;
3994 /* Scan through the loop stmts, applying the pattern recognition
3995 functions starting at each stmt visited: */
3996 for (i = 0; i < nbbs; i++)
3998 basic_block bb = bbs[i];
3999 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4001 /* Scan over all generic vect_recog_xxx_pattern functions. */
4002 for (j = 0; j < NUM_PATTERNS; j++)
4004 vect_recog_func = vect_vect_recog_func_ptrs[j];
4005 vect_pattern_recog_1 (vect_recog_func, si,
4006 &stmts_to_replace);
4011 else
4013 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4014 for (si = bb_vinfo->region_begin;
4015 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4017 if ((stmt = gsi_stmt (si))
4018 && vinfo_for_stmt (stmt)
4019 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4020 continue;
4022 /* Scan over all generic vect_recog_xxx_pattern functions. */
4023 for (j = 0; j < NUM_PATTERNS; j++)
4025 vect_recog_func = vect_vect_recog_func_ptrs[j];
4026 vect_pattern_recog_1 (vect_recog_func, si,
4027 &stmts_to_replace);