Fix cygwin performance loss on linpack.
[official-gcc.git] / gcc / tree-vect-patterns.c
blob5bab1f57a18f14724b90608c64a915696fbcbbc9
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
45 /* Pattern recognition functions */
46 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
47 tree *);
48 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
49 tree *);
50 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51 tree *);
52 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
53 tree *);
54 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
55 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
56 tree *);
57 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
58 tree *, tree *);
59 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
60 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
61 tree *, tree *);
62 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
63 tree *, tree *);
65 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
66 tree *, tree *);
68 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69 tree *, tree *);
70 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
71 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
72 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
73 vect_recog_widen_mult_pattern,
74 vect_recog_widen_sum_pattern,
75 vect_recog_dot_prod_pattern,
76 vect_recog_sad_pattern,
77 vect_recog_pow_pattern,
78 vect_recog_widen_shift_pattern,
79 vect_recog_over_widening_pattern,
80 vect_recog_rotate_pattern,
81 vect_recog_vector_vector_shift_pattern,
82 vect_recog_divmod_pattern,
83 vect_recog_mult_pattern,
84 vect_recog_mixed_size_cond_pattern,
85 vect_recog_bool_pattern,
86 vect_recog_mask_conversion_pattern};
88 static inline void
89 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
91 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
92 stmt);
95 static inline void
96 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
98 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
99 append_pattern_def_seq (stmt_info, stmt);
102 /* Check whether STMT2 is in the same loop or basic block as STMT1.
103 Which of the two applies depends on whether we're currently doing
104 loop-based or basic-block-based vectorization, as determined by
105 the vinfo_for_stmt for STMT1 (which must be defined).
107 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
108 to be defined as well. */
110 static bool
111 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
113 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
114 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
117 /* If the LHS of DEF_STMT has a single use, and that statement is
118 in the same loop or basic block, return it. */
120 static gimple *
121 vect_single_imm_use (gimple *def_stmt)
123 tree lhs = gimple_assign_lhs (def_stmt);
124 use_operand_p use_p;
125 gimple *use_stmt;
127 if (!single_imm_use (lhs, &use_p, &use_stmt))
128 return NULL;
130 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
131 return NULL;
133 return use_stmt;
136 /* Check whether NAME, an ssa-name used in USE_STMT,
137 is a result of a type promotion, such that:
138 DEF_STMT: NAME = NOP (name0)
139 If CHECK_SIGN is TRUE, check that either both types are signed or both are
140 unsigned. */
142 static bool
143 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
144 tree *orig_type, gimple **def_stmt, bool *promotion)
146 gimple *dummy_gimple;
147 stmt_vec_info stmt_vinfo;
148 tree type = TREE_TYPE (name);
149 tree oprnd0;
150 enum vect_def_type dt;
152 stmt_vinfo = vinfo_for_stmt (use_stmt);
153 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
154 return false;
156 if (dt != vect_internal_def
157 && dt != vect_external_def && dt != vect_constant_def)
158 return false;
160 if (!*def_stmt)
161 return false;
163 if (!is_gimple_assign (*def_stmt))
164 return false;
166 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
167 return false;
169 oprnd0 = gimple_assign_rhs1 (*def_stmt);
171 *orig_type = TREE_TYPE (oprnd0);
172 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
173 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
174 return false;
176 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
177 *promotion = true;
178 else
179 *promotion = false;
181 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
182 return false;
184 return true;
187 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
188 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
190 static tree
191 vect_recog_temp_ssa_var (tree type, gimple *stmt)
193 return make_temp_ssa_name (type, stmt, "patt");
196 /* Function vect_recog_dot_prod_pattern
198 Try to find the following pattern:
200 type x_t, y_t;
201 TYPE1 prod;
202 TYPE2 sum = init;
203 loop:
204 sum_0 = phi <init, sum_1>
205 S1 x_t = ...
206 S2 y_t = ...
207 S3 x_T = (TYPE1) x_t;
208 S4 y_T = (TYPE1) y_t;
209 S5 prod = x_T * y_T;
210 [S6 prod = (TYPE2) prod; #optional]
211 S7 sum_1 = prod + sum_0;
213 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
214 same size of 'TYPE1' or bigger. This is a special case of a reduction
215 computation.
217 Input:
219 * STMTS: Contains a stmt from which the pattern search begins. In the
220 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
221 will be detected.
223 Output:
225 * TYPE_IN: The type of the input arguments to the pattern.
227 * TYPE_OUT: The type of the output of this pattern.
229 * Return value: A new stmt that will be used to replace the sequence of
230 stmts that constitute the pattern. In this case it will be:
231 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
233 Note: The dot-prod idiom is a widening reduction pattern that is
234 vectorized without preserving all the intermediate results. It
235 produces only N/2 (widened) results (by summing up pairs of
236 intermediate results) rather than all N results. Therefore, we
237 cannot allow this pattern when we want to get all the results and in
238 the correct order (as is the case when this computation is in an
239 inner-loop nested in an outer-loop that us being vectorized). */
241 static gimple *
242 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
243 tree *type_out)
245 gimple *stmt, *last_stmt = (*stmts)[0];
246 tree oprnd0, oprnd1;
247 tree oprnd00, oprnd01;
248 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
249 tree type, half_type;
250 gimple *pattern_stmt;
251 tree prod_type;
252 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
253 struct loop *loop;
254 tree var;
255 bool promotion;
257 if (!loop_info)
258 return NULL;
260 loop = LOOP_VINFO_LOOP (loop_info);
262 /* We don't allow changing the order of the computation in the inner-loop
263 when doing outer-loop vectorization. */
264 if (loop && nested_in_vect_loop_p (loop, last_stmt))
265 return NULL;
267 if (!is_gimple_assign (last_stmt))
268 return NULL;
270 type = gimple_expr_type (last_stmt);
272 /* Look for the following pattern
273 DX = (TYPE1) X;
274 DY = (TYPE1) Y;
275 DPROD = DX * DY;
276 DDPROD = (TYPE2) DPROD;
277 sum_1 = DDPROD + sum_0;
278 In which
279 - DX is double the size of X
280 - DY is double the size of Y
281 - DX, DY, DPROD all have the same type
282 - sum is the same size of DPROD or bigger
283 - sum has been recognized as a reduction variable.
285 This is equivalent to:
286 DPROD = X w* Y; #widen mult
287 sum_1 = DPROD w+ sum_0; #widen summation
289 DPROD = X w* Y; #widen mult
290 sum_1 = DPROD + sum_0; #summation
293 /* Starting from LAST_STMT, follow the defs of its uses in search
294 of the above pattern. */
296 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
297 return NULL;
299 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
301 /* Has been detected as widening-summation? */
303 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
304 type = gimple_expr_type (stmt);
305 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
306 return NULL;
307 oprnd0 = gimple_assign_rhs1 (stmt);
308 oprnd1 = gimple_assign_rhs2 (stmt);
309 half_type = TREE_TYPE (oprnd0);
311 else
313 gimple *def_stmt;
315 oprnd0 = gimple_assign_rhs1 (last_stmt);
316 oprnd1 = gimple_assign_rhs2 (last_stmt);
317 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
318 || !types_compatible_p (TREE_TYPE (oprnd1), type))
319 return NULL;
320 stmt = last_stmt;
322 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
323 &promotion)
324 && promotion)
326 stmt = def_stmt;
327 oprnd0 = gimple_assign_rhs1 (stmt);
329 else
330 half_type = type;
333 /* So far so good. Since last_stmt was detected as a (summation) reduction,
334 we know that oprnd1 is the reduction variable (defined by a loop-header
335 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
336 Left to check that oprnd0 is defined by a (widen_)mult_expr */
337 if (TREE_CODE (oprnd0) != SSA_NAME)
338 return NULL;
340 prod_type = half_type;
341 stmt = SSA_NAME_DEF_STMT (oprnd0);
343 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
344 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
345 return NULL;
347 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
348 inside the loop (in case we are analyzing an outer-loop). */
349 if (!is_gimple_assign (stmt))
350 return NULL;
351 stmt_vinfo = vinfo_for_stmt (stmt);
352 gcc_assert (stmt_vinfo);
353 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
354 return NULL;
355 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
356 return NULL;
357 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
359 /* Has been detected as a widening multiplication? */
361 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
362 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
363 return NULL;
364 stmt_vinfo = vinfo_for_stmt (stmt);
365 gcc_assert (stmt_vinfo);
366 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
367 oprnd00 = gimple_assign_rhs1 (stmt);
368 oprnd01 = gimple_assign_rhs2 (stmt);
369 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
370 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
372 else
374 tree half_type0, half_type1;
375 gimple *def_stmt;
376 tree oprnd0, oprnd1;
378 oprnd0 = gimple_assign_rhs1 (stmt);
379 oprnd1 = gimple_assign_rhs2 (stmt);
380 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
381 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
382 return NULL;
383 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
384 &promotion)
385 || !promotion)
386 return NULL;
387 oprnd00 = gimple_assign_rhs1 (def_stmt);
388 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
389 &promotion)
390 || !promotion)
391 return NULL;
392 oprnd01 = gimple_assign_rhs1 (def_stmt);
393 if (!types_compatible_p (half_type0, half_type1))
394 return NULL;
395 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
396 return NULL;
399 half_type = TREE_TYPE (oprnd00);
400 *type_in = half_type;
401 *type_out = type;
403 /* Pattern detected. Create a stmt to be used to replace the pattern: */
404 var = vect_recog_temp_ssa_var (type, NULL);
405 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
406 oprnd00, oprnd01, oprnd1);
408 if (dump_enabled_p ())
410 dump_printf_loc (MSG_NOTE, vect_location,
411 "vect_recog_dot_prod_pattern: detected: ");
412 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
413 dump_printf (MSG_NOTE, "\n");
416 return pattern_stmt;
420 /* Function vect_recog_sad_pattern
422 Try to find the following Sum of Absolute Difference (SAD) pattern:
424 type x_t, y_t;
425 signed TYPE1 diff, abs_diff;
426 TYPE2 sum = init;
427 loop:
428 sum_0 = phi <init, sum_1>
429 S1 x_t = ...
430 S2 y_t = ...
431 S3 x_T = (TYPE1) x_t;
432 S4 y_T = (TYPE1) y_t;
433 S5 diff = x_T - y_T;
434 S6 abs_diff = ABS_EXPR <diff>;
435 [S7 abs_diff = (TYPE2) abs_diff; #optional]
436 S8 sum_1 = abs_diff + sum_0;
438 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
439 same size of 'TYPE1' or bigger. This is a special case of a reduction
440 computation.
442 Input:
444 * STMTS: Contains a stmt from which the pattern search begins. In the
445 example, when this function is called with S8, the pattern
446 {S3,S4,S5,S6,S7,S8} will be detected.
448 Output:
450 * TYPE_IN: The type of the input arguments to the pattern.
452 * TYPE_OUT: The type of the output of this pattern.
454 * Return value: A new stmt that will be used to replace the sequence of
455 stmts that constitute the pattern. In this case it will be:
456 SAD_EXPR <x_t, y_t, sum_0>
459 static gimple *
460 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
461 tree *type_out)
463 gimple *last_stmt = (*stmts)[0];
464 tree sad_oprnd0, sad_oprnd1;
465 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
466 tree half_type;
467 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
468 struct loop *loop;
469 bool promotion;
471 if (!loop_info)
472 return NULL;
474 loop = LOOP_VINFO_LOOP (loop_info);
476 /* We don't allow changing the order of the computation in the inner-loop
477 when doing outer-loop vectorization. */
478 if (loop && nested_in_vect_loop_p (loop, last_stmt))
479 return NULL;
481 if (!is_gimple_assign (last_stmt))
482 return NULL;
484 tree sum_type = gimple_expr_type (last_stmt);
486 /* Look for the following pattern
487 DX = (TYPE1) X;
488 DY = (TYPE1) Y;
489 DDIFF = DX - DY;
490 DAD = ABS_EXPR <DDIFF>;
491 DDPROD = (TYPE2) DPROD;
492 sum_1 = DAD + sum_0;
493 In which
494 - DX is at least double the size of X
495 - DY is at least double the size of Y
496 - DX, DY, DDIFF, DAD all have the same type
497 - sum is the same size of DAD or bigger
498 - sum has been recognized as a reduction variable.
500 This is equivalent to:
501 DDIFF = X w- Y; #widen sub
502 DAD = ABS_EXPR <DDIFF>;
503 sum_1 = DAD w+ sum_0; #widen summation
505 DDIFF = X w- Y; #widen sub
506 DAD = ABS_EXPR <DDIFF>;
507 sum_1 = DAD + sum_0; #summation
510 /* Starting from LAST_STMT, follow the defs of its uses in search
511 of the above pattern. */
513 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
514 return NULL;
516 tree plus_oprnd0, plus_oprnd1;
518 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
520 /* Has been detected as widening-summation? */
522 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
523 sum_type = gimple_expr_type (stmt);
524 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
525 return NULL;
526 plus_oprnd0 = gimple_assign_rhs1 (stmt);
527 plus_oprnd1 = gimple_assign_rhs2 (stmt);
528 half_type = TREE_TYPE (plus_oprnd0);
530 else
532 gimple *def_stmt;
534 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
535 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
536 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
537 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
538 return NULL;
540 /* The type conversion could be promotion, demotion,
541 or just signed -> unsigned. */
542 if (type_conversion_p (plus_oprnd0, last_stmt, false,
543 &half_type, &def_stmt, &promotion))
544 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
545 else
546 half_type = sum_type;
549 /* So far so good. Since last_stmt was detected as a (summation) reduction,
550 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
551 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
552 Then check that plus_oprnd0 is defined by an abs_expr. */
554 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
555 return NULL;
557 tree abs_type = half_type;
558 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
560 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
561 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
562 return NULL;
564 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
565 inside the loop (in case we are analyzing an outer-loop). */
566 if (!is_gimple_assign (abs_stmt))
567 return NULL;
569 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
570 gcc_assert (abs_stmt_vinfo);
571 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
572 return NULL;
573 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
574 return NULL;
576 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
577 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
578 return NULL;
579 if (TYPE_UNSIGNED (abs_type))
580 return NULL;
582 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
584 if (TREE_CODE (abs_oprnd) != SSA_NAME)
585 return NULL;
587 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
589 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
590 if (!gimple_bb (diff_stmt)
591 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
592 return NULL;
594 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
595 inside the loop (in case we are analyzing an outer-loop). */
596 if (!is_gimple_assign (diff_stmt))
597 return NULL;
599 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
600 gcc_assert (diff_stmt_vinfo);
601 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
602 return NULL;
603 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
604 return NULL;
606 tree half_type0, half_type1;
607 gimple *def_stmt;
609 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
610 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
612 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
613 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
614 return NULL;
615 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
616 &half_type0, &def_stmt, &promotion)
617 || !promotion)
618 return NULL;
619 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
621 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
622 &half_type1, &def_stmt, &promotion)
623 || !promotion)
624 return NULL;
625 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
627 if (!types_compatible_p (half_type0, half_type1))
628 return NULL;
629 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
630 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
631 return NULL;
633 *type_in = TREE_TYPE (sad_oprnd0);
634 *type_out = sum_type;
636 /* Pattern detected. Create a stmt to be used to replace the pattern: */
637 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
638 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
639 sad_oprnd1, plus_oprnd1);
641 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "vect_recog_sad_pattern: detected: ");
645 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
646 dump_printf (MSG_NOTE, "\n");
649 return pattern_stmt;
653 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
654 and LSHIFT_EXPR.
656 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
657 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
659 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
660 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
661 that satisfies the above restrictions, we can perform a widening opeartion
662 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
663 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
665 static bool
666 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
667 tree const_oprnd, tree *oprnd,
668 gimple **wstmt, tree type,
669 tree *half_type, gimple *def_stmt)
671 tree new_type, new_oprnd;
673 if (code != MULT_EXPR && code != LSHIFT_EXPR)
674 return false;
676 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
677 || (code == LSHIFT_EXPR
678 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
679 != 1))
680 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
682 /* CONST_OPRND is a constant of HALF_TYPE. */
683 *oprnd = gimple_assign_rhs1 (def_stmt);
684 return true;
687 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
688 return false;
690 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
691 return false;
693 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
694 a type 2 times bigger than HALF_TYPE. */
695 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
696 TYPE_UNSIGNED (type));
697 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
698 || (code == LSHIFT_EXPR
699 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
700 return false;
702 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
703 *oprnd = gimple_assign_rhs1 (def_stmt);
704 new_oprnd = make_ssa_name (new_type);
705 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
706 *oprnd = new_oprnd;
708 *half_type = new_type;
709 return true;
713 /* Function vect_recog_widen_mult_pattern
715 Try to find the following pattern:
717 type1 a_t;
718 type2 b_t;
719 TYPE a_T, b_T, prod_T;
721 S1 a_t = ;
722 S2 b_t = ;
723 S3 a_T = (TYPE) a_t;
724 S4 b_T = (TYPE) b_t;
725 S5 prod_T = a_T * b_T;
727 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
729 Also detect unsigned cases:
731 unsigned type1 a_t;
732 unsigned type2 b_t;
733 unsigned TYPE u_prod_T;
734 TYPE a_T, b_T, prod_T;
736 S1 a_t = ;
737 S2 b_t = ;
738 S3 a_T = (TYPE) a_t;
739 S4 b_T = (TYPE) b_t;
740 S5 prod_T = a_T * b_T;
741 S6 u_prod_T = (unsigned TYPE) prod_T;
743 and multiplication by constants:
745 type a_t;
746 TYPE a_T, prod_T;
748 S1 a_t = ;
749 S3 a_T = (TYPE) a_t;
750 S5 prod_T = a_T * CONST;
752 A special case of multiplication by constants is when 'TYPE' is 4 times
753 bigger than 'type', but CONST fits an intermediate type 2 times smaller
754 than 'TYPE'. In that case we create an additional pattern stmt for S3
755 to create a variable of the intermediate type, and perform widen-mult
756 on the intermediate type as well:
758 type a_t;
759 interm_type a_it;
760 TYPE a_T, prod_T, prod_T';
762 S1 a_t = ;
763 S3 a_T = (TYPE) a_t;
764 '--> a_it = (interm_type) a_t;
765 S5 prod_T = a_T * CONST;
766 '--> prod_T' = a_it w* CONST;
768 Input/Output:
770 * STMTS: Contains a stmt from which the pattern search begins. In the
771 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
772 is detected. In case of unsigned widen-mult, the original stmt (S5) is
773 replaced with S6 in STMTS. In case of multiplication by a constant
774 of an intermediate type (the last case above), STMTS also contains S3
775 (inserted before S5).
777 Output:
779 * TYPE_IN: The type of the input arguments to the pattern.
781 * TYPE_OUT: The type of the output of this pattern.
783 * Return value: A new stmt that will be used to replace the sequence of
784 stmts that constitute the pattern. In this case it will be:
785 WIDEN_MULT <a_t, b_t>
786 If the result of WIDEN_MULT needs to be converted to a larger type, the
787 returned stmt will be this type conversion stmt.
790 static gimple *
791 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
792 tree *type_in, tree *type_out)
794 gimple *last_stmt = stmts->pop ();
795 gimple *def_stmt0, *def_stmt1;
796 tree oprnd0, oprnd1;
797 tree type, half_type0, half_type1;
798 gimple *new_stmt = NULL, *pattern_stmt = NULL;
799 tree vectype, vecitype;
800 tree var;
801 enum tree_code dummy_code;
802 int dummy_int;
803 vec<tree> dummy_vec;
804 bool op1_ok;
805 bool promotion;
807 if (!is_gimple_assign (last_stmt))
808 return NULL;
810 type = gimple_expr_type (last_stmt);
812 /* Starting from LAST_STMT, follow the defs of its uses in search
813 of the above pattern. */
815 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
816 return NULL;
818 oprnd0 = gimple_assign_rhs1 (last_stmt);
819 oprnd1 = gimple_assign_rhs2 (last_stmt);
820 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
821 || !types_compatible_p (TREE_TYPE (oprnd1), type))
822 return NULL;
824 /* Check argument 0. */
825 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
826 &promotion)
827 || !promotion)
828 return NULL;
829 /* Check argument 1. */
830 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
831 &def_stmt1, &promotion);
833 if (op1_ok && promotion)
835 oprnd0 = gimple_assign_rhs1 (def_stmt0);
836 oprnd1 = gimple_assign_rhs1 (def_stmt1);
838 else
840 if (TREE_CODE (oprnd1) == INTEGER_CST
841 && TREE_CODE (half_type0) == INTEGER_TYPE
842 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
843 &oprnd0, &new_stmt, type,
844 &half_type0, def_stmt0))
846 half_type1 = half_type0;
847 oprnd1 = fold_convert (half_type1, oprnd1);
849 else
850 return NULL;
853 /* If the two arguments have different sizes, convert the one with
854 the smaller type into the larger type. */
855 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
857 /* If we already used up the single-stmt slot give up. */
858 if (new_stmt)
859 return NULL;
861 tree* oprnd = NULL;
862 gimple *def_stmt = NULL;
864 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
866 def_stmt = def_stmt0;
867 half_type0 = half_type1;
868 oprnd = &oprnd0;
870 else
872 def_stmt = def_stmt1;
873 half_type1 = half_type0;
874 oprnd = &oprnd1;
877 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
878 tree new_oprnd = make_ssa_name (half_type0);
879 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
880 *oprnd = new_oprnd;
883 /* Handle unsigned case. Look for
884 S6 u_prod_T = (unsigned TYPE) prod_T;
885 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
886 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
888 gimple *use_stmt;
889 tree use_lhs;
890 tree use_type;
892 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
893 return NULL;
895 use_stmt = vect_single_imm_use (last_stmt);
896 if (!use_stmt || !is_gimple_assign (use_stmt)
897 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
898 return NULL;
900 use_lhs = gimple_assign_lhs (use_stmt);
901 use_type = TREE_TYPE (use_lhs);
902 if (!INTEGRAL_TYPE_P (use_type)
903 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
904 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
905 return NULL;
907 type = use_type;
908 last_stmt = use_stmt;
911 if (!types_compatible_p (half_type0, half_type1))
912 return NULL;
914 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
915 to get an intermediate result of type ITYPE. In this case we need
916 to build a statement to convert this intermediate result to type TYPE. */
917 tree itype = type;
918 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
919 itype = build_nonstandard_integer_type
920 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
921 TYPE_UNSIGNED (type));
923 /* Pattern detected. */
924 if (dump_enabled_p ())
925 dump_printf_loc (MSG_NOTE, vect_location,
926 "vect_recog_widen_mult_pattern: detected:\n");
928 /* Check target support */
929 vectype = get_vectype_for_scalar_type (half_type0);
930 vecitype = get_vectype_for_scalar_type (itype);
931 if (!vectype
932 || !vecitype
933 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
934 vecitype, vectype,
935 &dummy_code, &dummy_code,
936 &dummy_int, &dummy_vec))
937 return NULL;
939 *type_in = vectype;
940 *type_out = get_vectype_for_scalar_type (type);
942 /* Pattern supported. Create a stmt to be used to replace the pattern: */
943 var = vect_recog_temp_ssa_var (itype, NULL);
944 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
946 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
947 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
949 /* If the original two operands have different sizes, we may need to convert
950 the smaller one into the larget type. If this is the case, at this point
951 the new stmt is already built. */
952 if (new_stmt)
954 append_pattern_def_seq (stmt_vinfo, new_stmt);
955 stmt_vec_info new_stmt_info
956 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
957 set_vinfo_for_stmt (new_stmt, new_stmt_info);
958 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
961 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
962 the result of the widen-mult operation into type TYPE. */
963 if (itype != type)
965 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
966 stmt_vec_info pattern_stmt_info
967 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
968 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
969 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
970 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
971 NOP_EXPR,
972 gimple_assign_lhs (pattern_stmt));
975 if (dump_enabled_p ())
976 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
978 stmts->safe_push (last_stmt);
979 return pattern_stmt;
983 /* Function vect_recog_pow_pattern
985 Try to find the following pattern:
987 x = POW (y, N);
989 with POW being one of pow, powf, powi, powif and N being
990 either 2 or 0.5.
992 Input:
994 * LAST_STMT: A stmt from which the pattern search begins.
996 Output:
998 * TYPE_IN: The type of the input arguments to the pattern.
1000 * TYPE_OUT: The type of the output of this pattern.
1002 * Return value: A new stmt that will be used to replace the sequence of
1003 stmts that constitute the pattern. In this case it will be:
1004 x = x * x
1006 x = sqrt (x)
1009 static gimple *
1010 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1011 tree *type_out)
1013 gimple *last_stmt = (*stmts)[0];
1014 tree base, exp = NULL;
1015 gimple *stmt;
1016 tree var;
1018 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1019 return NULL;
1021 switch (gimple_call_combined_fn (last_stmt))
1023 CASE_CFN_POW:
1024 CASE_CFN_POWI:
1025 base = gimple_call_arg (last_stmt, 0);
1026 exp = gimple_call_arg (last_stmt, 1);
1027 if (TREE_CODE (exp) != REAL_CST
1028 && TREE_CODE (exp) != INTEGER_CST)
1029 return NULL;
1030 break;
1032 default:
1033 return NULL;
1036 /* We now have a pow or powi builtin function call with a constant
1037 exponent. */
1039 *type_out = NULL_TREE;
1041 /* Catch squaring. */
1042 if ((tree_fits_shwi_p (exp)
1043 && tree_to_shwi (exp) == 2)
1044 || (TREE_CODE (exp) == REAL_CST
1045 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1047 *type_in = TREE_TYPE (base);
1049 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1050 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1051 return stmt;
1054 /* Catch square root. */
1055 if (TREE_CODE (exp) == REAL_CST
1056 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1058 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1059 if (*type_in && direct_internal_fn_supported_p (IFN_SQRT, *type_in))
1061 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1062 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1063 gimple_call_set_lhs (stmt, var);
1064 return stmt;
1068 return NULL;
1072 /* Function vect_recog_widen_sum_pattern
1074 Try to find the following pattern:
1076 type x_t;
1077 TYPE x_T, sum = init;
1078 loop:
1079 sum_0 = phi <init, sum_1>
1080 S1 x_t = *p;
1081 S2 x_T = (TYPE) x_t;
1082 S3 sum_1 = x_T + sum_0;
1084 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1085 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1086 a special case of a reduction computation.
1088 Input:
1090 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1091 when this function is called with S3, the pattern {S2,S3} will be detected.
1093 Output:
1095 * TYPE_IN: The type of the input arguments to the pattern.
1097 * TYPE_OUT: The type of the output of this pattern.
1099 * Return value: A new stmt that will be used to replace the sequence of
1100 stmts that constitute the pattern. In this case it will be:
1101 WIDEN_SUM <x_t, sum_0>
1103 Note: The widening-sum idiom is a widening reduction pattern that is
1104 vectorized without preserving all the intermediate results. It
1105 produces only N/2 (widened) results (by summing up pairs of
1106 intermediate results) rather than all N results. Therefore, we
1107 cannot allow this pattern when we want to get all the results and in
1108 the correct order (as is the case when this computation is in an
1109 inner-loop nested in an outer-loop that us being vectorized). */
1111 static gimple *
1112 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1113 tree *type_out)
1115 gimple *stmt, *last_stmt = (*stmts)[0];
1116 tree oprnd0, oprnd1;
1117 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1118 tree type, half_type;
1119 gimple *pattern_stmt;
1120 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1121 struct loop *loop;
1122 tree var;
1123 bool promotion;
1125 if (!loop_info)
1126 return NULL;
1128 loop = LOOP_VINFO_LOOP (loop_info);
1130 /* We don't allow changing the order of the computation in the inner-loop
1131 when doing outer-loop vectorization. */
1132 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1133 return NULL;
1135 if (!is_gimple_assign (last_stmt))
1136 return NULL;
1138 type = gimple_expr_type (last_stmt);
1140 /* Look for the following pattern
1141 DX = (TYPE) X;
1142 sum_1 = DX + sum_0;
1143 In which DX is at least double the size of X, and sum_1 has been
1144 recognized as a reduction variable.
1147 /* Starting from LAST_STMT, follow the defs of its uses in search
1148 of the above pattern. */
1150 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1151 return NULL;
1153 oprnd0 = gimple_assign_rhs1 (last_stmt);
1154 oprnd1 = gimple_assign_rhs2 (last_stmt);
1155 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1156 || !types_compatible_p (TREE_TYPE (oprnd1), type))
1157 return NULL;
1159 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1160 we know that oprnd1 is the reduction variable (defined by a loop-header
1161 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1162 Left to check that oprnd0 is defined by a cast from type 'type' to type
1163 'TYPE'. */
1165 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1166 &promotion)
1167 || !promotion)
1168 return NULL;
1170 oprnd0 = gimple_assign_rhs1 (stmt);
1171 *type_in = half_type;
1172 *type_out = type;
1174 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1175 var = vect_recog_temp_ssa_var (type, NULL);
1176 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1178 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE, vect_location,
1181 "vect_recog_widen_sum_pattern: detected: ");
1182 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1183 dump_printf (MSG_NOTE, "\n");
1186 return pattern_stmt;
1190 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1192 Input:
1193 STMT - a statement to check.
1194 DEF - we support operations with two operands, one of which is constant.
1195 The other operand can be defined by a demotion operation, or by a
1196 previous statement in a sequence of over-promoted operations. In the
1197 later case DEF is used to replace that operand. (It is defined by a
1198 pattern statement we created for the previous statement in the
1199 sequence).
1201 Input/output:
1202 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1203 NULL, it's the type of DEF.
1204 STMTS - additional pattern statements. If a pattern statement (type
1205 conversion) is created in this function, its original statement is
1206 added to STMTS.
1208 Output:
1209 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1210 operands to use in the new pattern statement for STMT (will be created
1211 in vect_recog_over_widening_pattern ()).
1212 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1213 statements for STMT: the first one is a type promotion and the second
1214 one is the operation itself. We return the type promotion statement
1215 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1216 the second pattern statement. */
1218 static bool
1219 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1220 tree *op0, tree *op1, gimple **new_def_stmt,
1221 vec<gimple *> *stmts)
1223 enum tree_code code;
1224 tree const_oprnd, oprnd;
1225 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1226 gimple *def_stmt, *new_stmt;
1227 bool first = false;
1228 bool promotion;
1230 *op0 = NULL_TREE;
1231 *op1 = NULL_TREE;
1232 *new_def_stmt = NULL;
1234 if (!is_gimple_assign (stmt))
1235 return false;
1237 code = gimple_assign_rhs_code (stmt);
1238 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1239 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1240 return false;
1242 oprnd = gimple_assign_rhs1 (stmt);
1243 const_oprnd = gimple_assign_rhs2 (stmt);
1244 type = gimple_expr_type (stmt);
1246 if (TREE_CODE (oprnd) != SSA_NAME
1247 || TREE_CODE (const_oprnd) != INTEGER_CST)
1248 return false;
1250 /* If oprnd has other uses besides that in stmt we cannot mark it
1251 as being part of a pattern only. */
1252 if (!has_single_use (oprnd))
1253 return false;
1255 /* If we are in the middle of a sequence, we use DEF from a previous
1256 statement. Otherwise, OPRND has to be a result of type promotion. */
1257 if (*new_type)
1259 half_type = *new_type;
1260 oprnd = def;
1262 else
1264 first = true;
1265 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1266 &promotion)
1267 || !promotion
1268 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1269 return false;
1272 /* Can we perform the operation on a smaller type? */
1273 switch (code)
1275 case BIT_IOR_EXPR:
1276 case BIT_XOR_EXPR:
1277 case BIT_AND_EXPR:
1278 if (!int_fits_type_p (const_oprnd, half_type))
1280 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1281 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1282 return false;
1284 interm_type = build_nonstandard_integer_type (
1285 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1286 if (!int_fits_type_p (const_oprnd, interm_type))
1287 return false;
1290 break;
1292 case LSHIFT_EXPR:
1293 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1294 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1295 return false;
1297 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1298 (e.g., if the original value was char, the shift amount is at most 8
1299 if we want to use short). */
1300 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1301 return false;
1303 interm_type = build_nonstandard_integer_type (
1304 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1306 if (!vect_supportable_shift (code, interm_type))
1307 return false;
1309 break;
1311 case RSHIFT_EXPR:
1312 if (vect_supportable_shift (code, half_type))
1313 break;
1315 /* Try intermediate type - HALF_TYPE is not supported. */
1316 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1317 return false;
1319 interm_type = build_nonstandard_integer_type (
1320 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1322 if (!vect_supportable_shift (code, interm_type))
1323 return false;
1325 break;
1327 default:
1328 gcc_unreachable ();
1331 /* There are four possible cases:
1332 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1333 the first statement in the sequence)
1334 a. The original, HALF_TYPE, is not enough - we replace the promotion
1335 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1336 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1337 promotion.
1338 2. OPRND is defined by a pattern statement we created.
1339 a. Its type is not sufficient for the operation, we create a new stmt:
1340 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1341 this statement in NEW_DEF_STMT, and it is later put in
1342 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1343 b. OPRND is good to use in the new statement. */
1344 if (first)
1346 if (interm_type)
1348 /* Replace the original type conversion HALF_TYPE->TYPE with
1349 HALF_TYPE->INTERM_TYPE. */
1350 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1352 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1353 /* Check if the already created pattern stmt is what we need. */
1354 if (!is_gimple_assign (new_stmt)
1355 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1356 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1357 return false;
1359 stmts->safe_push (def_stmt);
1360 oprnd = gimple_assign_lhs (new_stmt);
1362 else
1364 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1365 oprnd = gimple_assign_rhs1 (def_stmt);
1366 new_oprnd = make_ssa_name (interm_type);
1367 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1368 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1369 stmts->safe_push (def_stmt);
1370 oprnd = new_oprnd;
1373 else
1375 /* Retrieve the operand before the type promotion. */
1376 oprnd = gimple_assign_rhs1 (def_stmt);
1379 else
1381 if (interm_type)
1383 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1384 new_oprnd = make_ssa_name (interm_type);
1385 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1386 oprnd = new_oprnd;
1387 *new_def_stmt = new_stmt;
1390 /* Otherwise, OPRND is already set. */
1393 if (interm_type)
1394 *new_type = interm_type;
1395 else
1396 *new_type = half_type;
1398 *op0 = oprnd;
1399 *op1 = fold_convert (*new_type, const_oprnd);
1401 return true;
1405 /* Try to find a statement or a sequence of statements that can be performed
1406 on a smaller type:
1408 type x_t;
1409 TYPE x_T, res0_T, res1_T;
1410 loop:
1411 S1 x_t = *p;
1412 S2 x_T = (TYPE) x_t;
1413 S3 res0_T = op (x_T, C0);
1414 S4 res1_T = op (res0_T, C1);
1415 S5 ... = () res1_T; - type demotion
1417 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1418 constants.
1419 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1420 be 'type' or some intermediate type. For now, we expect S5 to be a type
1421 demotion operation. We also check that S3 and S4 have only one use. */
1423 static gimple *
1424 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1425 tree *type_in, tree *type_out)
1427 gimple *stmt = stmts->pop ();
1428 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1429 *use_stmt = NULL;
1430 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1431 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1432 bool first;
1433 tree type = NULL;
1435 first = true;
1436 while (1)
1438 if (!vinfo_for_stmt (stmt)
1439 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1440 return NULL;
1442 new_def_stmt = NULL;
1443 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1444 &op0, &op1, &new_def_stmt,
1445 stmts))
1447 if (first)
1448 return NULL;
1449 else
1450 break;
1453 /* STMT can be performed on a smaller type. Check its uses. */
1454 use_stmt = vect_single_imm_use (stmt);
1455 if (!use_stmt || !is_gimple_assign (use_stmt))
1456 return NULL;
1458 /* Create pattern statement for STMT. */
1459 vectype = get_vectype_for_scalar_type (new_type);
1460 if (!vectype)
1461 return NULL;
1463 /* We want to collect all the statements for which we create pattern
1464 statetments, except for the case when the last statement in the
1465 sequence doesn't have a corresponding pattern statement. In such
1466 case we associate the last pattern statement with the last statement
1467 in the sequence. Therefore, we only add the original statement to
1468 the list if we know that it is not the last. */
1469 if (prev_stmt)
1470 stmts->safe_push (prev_stmt);
1472 var = vect_recog_temp_ssa_var (new_type, NULL);
1473 pattern_stmt
1474 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1475 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1476 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1478 if (dump_enabled_p ())
1480 dump_printf_loc (MSG_NOTE, vect_location,
1481 "created pattern stmt: ");
1482 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1483 dump_printf (MSG_NOTE, "\n");
1486 type = gimple_expr_type (stmt);
1487 prev_stmt = stmt;
1488 stmt = use_stmt;
1490 first = false;
1493 /* We got a sequence. We expect it to end with a type demotion operation.
1494 Otherwise, we quit (for now). There are three possible cases: the
1495 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1496 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1497 NEW_TYPE differs (we create a new conversion statement). */
1498 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1500 use_lhs = gimple_assign_lhs (use_stmt);
1501 use_type = TREE_TYPE (use_lhs);
1502 /* Support only type demotion or signedess change. */
1503 if (!INTEGRAL_TYPE_P (use_type)
1504 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1505 return NULL;
1507 /* Check that NEW_TYPE is not bigger than the conversion result. */
1508 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1509 return NULL;
1511 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1512 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1514 /* Create NEW_TYPE->USE_TYPE conversion. */
1515 new_oprnd = make_ssa_name (use_type);
1516 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1517 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1519 *type_in = get_vectype_for_scalar_type (new_type);
1520 *type_out = get_vectype_for_scalar_type (use_type);
1522 /* We created a pattern statement for the last statement in the
1523 sequence, so we don't need to associate it with the pattern
1524 statement created for PREV_STMT. Therefore, we add PREV_STMT
1525 to the list in order to mark it later in vect_pattern_recog_1. */
1526 if (prev_stmt)
1527 stmts->safe_push (prev_stmt);
1529 else
1531 if (prev_stmt)
1532 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1533 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1535 *type_in = vectype;
1536 *type_out = NULL_TREE;
1539 stmts->safe_push (use_stmt);
1541 else
1542 /* TODO: support general case, create a conversion to the correct type. */
1543 return NULL;
1545 /* Pattern detected. */
1546 if (dump_enabled_p ())
1548 dump_printf_loc (MSG_NOTE, vect_location,
1549 "vect_recog_over_widening_pattern: detected: ");
1550 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1551 dump_printf (MSG_NOTE, "\n");
1554 return pattern_stmt;
1557 /* Detect widening shift pattern:
1559 type a_t;
1560 TYPE a_T, res_T;
1562 S1 a_t = ;
1563 S2 a_T = (TYPE) a_t;
1564 S3 res_T = a_T << CONST;
1566 where type 'TYPE' is at least double the size of type 'type'.
1568 Also detect cases where the shift result is immediately converted
1569 to another type 'result_type' that is no larger in size than 'TYPE'.
1570 In those cases we perform a widen-shift that directly results in
1571 'result_type', to avoid a possible over-widening situation:
1573 type a_t;
1574 TYPE a_T, res_T;
1575 result_type res_result;
1577 S1 a_t = ;
1578 S2 a_T = (TYPE) a_t;
1579 S3 res_T = a_T << CONST;
1580 S4 res_result = (result_type) res_T;
1581 '--> res_result' = a_t w<< CONST;
1583 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1584 create an additional pattern stmt for S2 to create a variable of an
1585 intermediate type, and perform widen-shift on the intermediate type:
1587 type a_t;
1588 interm_type a_it;
1589 TYPE a_T, res_T, res_T';
1591 S1 a_t = ;
1592 S2 a_T = (TYPE) a_t;
1593 '--> a_it = (interm_type) a_t;
1594 S3 res_T = a_T << CONST;
1595 '--> res_T' = a_it <<* CONST;
1597 Input/Output:
1599 * STMTS: Contains a stmt from which the pattern search begins.
1600 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1601 in STMTS. When an intermediate type is used and a pattern statement is
1602 created for S2, we also put S2 here (before S3).
1604 Output:
1606 * TYPE_IN: The type of the input arguments to the pattern.
1608 * TYPE_OUT: The type of the output of this pattern.
1610 * Return value: A new stmt that will be used to replace the sequence of
1611 stmts that constitute the pattern. In this case it will be:
1612 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1614 static gimple *
1615 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1616 tree *type_in, tree *type_out)
1618 gimple *last_stmt = stmts->pop ();
1619 gimple *def_stmt0;
1620 tree oprnd0, oprnd1;
1621 tree type, half_type0;
1622 gimple *pattern_stmt;
1623 tree vectype, vectype_out = NULL_TREE;
1624 tree var;
1625 enum tree_code dummy_code;
1626 int dummy_int;
1627 vec<tree> dummy_vec;
1628 gimple *use_stmt;
1629 bool promotion;
1631 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1632 return NULL;
1634 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1635 return NULL;
1637 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1638 return NULL;
1640 oprnd0 = gimple_assign_rhs1 (last_stmt);
1641 oprnd1 = gimple_assign_rhs2 (last_stmt);
1642 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1643 return NULL;
1645 /* Check operand 0: it has to be defined by a type promotion. */
1646 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1647 &promotion)
1648 || !promotion)
1649 return NULL;
1651 /* Check operand 1: has to be positive. We check that it fits the type
1652 in vect_handle_widen_op_by_const (). */
1653 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1654 return NULL;
1656 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1657 type = gimple_expr_type (last_stmt);
1659 /* Check for subsequent conversion to another type. */
1660 use_stmt = vect_single_imm_use (last_stmt);
1661 if (use_stmt && is_gimple_assign (use_stmt)
1662 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1663 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1665 tree use_lhs = gimple_assign_lhs (use_stmt);
1666 tree use_type = TREE_TYPE (use_lhs);
1668 if (INTEGRAL_TYPE_P (use_type)
1669 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1671 last_stmt = use_stmt;
1672 type = use_type;
1676 /* Check if this a widening operation. */
1677 gimple *wstmt = NULL;
1678 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1679 &oprnd0, &wstmt,
1680 type, &half_type0, def_stmt0))
1681 return NULL;
1683 /* Pattern detected. */
1684 if (dump_enabled_p ())
1685 dump_printf_loc (MSG_NOTE, vect_location,
1686 "vect_recog_widen_shift_pattern: detected:\n");
1688 /* Check target support. */
1689 vectype = get_vectype_for_scalar_type (half_type0);
1690 vectype_out = get_vectype_for_scalar_type (type);
1692 if (!vectype
1693 || !vectype_out
1694 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1695 vectype_out, vectype,
1696 &dummy_code, &dummy_code,
1697 &dummy_int, &dummy_vec))
1698 return NULL;
1700 *type_in = vectype;
1701 *type_out = vectype_out;
1703 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1704 var = vect_recog_temp_ssa_var (type, NULL);
1705 pattern_stmt =
1706 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1707 if (wstmt)
1709 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1710 new_pattern_def_seq (stmt_vinfo, wstmt);
1711 stmt_vec_info new_stmt_info
1712 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1713 set_vinfo_for_stmt (wstmt, new_stmt_info);
1714 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1717 if (dump_enabled_p ())
1718 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1720 stmts->safe_push (last_stmt);
1721 return pattern_stmt;
1724 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1726 type a_t, b_t, c_t;
1728 S0 a_t = b_t r<< c_t;
1730 Input/Output:
1732 * STMTS: Contains a stmt from which the pattern search begins,
1733 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1734 with a sequence:
1736 S1 d_t = -c_t;
1737 S2 e_t = d_t & (B - 1);
1738 S3 f_t = b_t << c_t;
1739 S4 g_t = b_t >> e_t;
1740 S0 a_t = f_t | g_t;
1742 where B is element bitsize of type.
1744 Output:
1746 * TYPE_IN: The type of the input arguments to the pattern.
1748 * TYPE_OUT: The type of the output of this pattern.
1750 * Return value: A new stmt that will be used to replace the rotate
1751 S0 stmt. */
1753 static gimple *
1754 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1756 gimple *last_stmt = stmts->pop ();
1757 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1758 gimple *pattern_stmt, *def_stmt;
1759 enum tree_code rhs_code;
1760 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1761 vec_info *vinfo = stmt_vinfo->vinfo;
1762 enum vect_def_type dt;
1763 optab optab1, optab2;
1764 edge ext_def = NULL;
1766 if (!is_gimple_assign (last_stmt))
1767 return NULL;
1769 rhs_code = gimple_assign_rhs_code (last_stmt);
1770 switch (rhs_code)
1772 case LROTATE_EXPR:
1773 case RROTATE_EXPR:
1774 break;
1775 default:
1776 return NULL;
1779 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1780 return NULL;
1782 lhs = gimple_assign_lhs (last_stmt);
1783 oprnd0 = gimple_assign_rhs1 (last_stmt);
1784 type = TREE_TYPE (oprnd0);
1785 oprnd1 = gimple_assign_rhs2 (last_stmt);
1786 if (TREE_CODE (oprnd0) != SSA_NAME
1787 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1788 || !INTEGRAL_TYPE_P (type)
1789 || !TYPE_UNSIGNED (type))
1790 return NULL;
1792 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1793 return NULL;
1795 if (dt != vect_internal_def
1796 && dt != vect_constant_def
1797 && dt != vect_external_def)
1798 return NULL;
1800 vectype = get_vectype_for_scalar_type (type);
1801 if (vectype == NULL_TREE)
1802 return NULL;
1804 /* If vector/vector or vector/scalar rotate is supported by the target,
1805 don't do anything here. */
1806 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1807 if (optab1
1808 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1809 return NULL;
1811 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1813 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1814 if (optab2
1815 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1816 return NULL;
1819 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1820 don't do anything here either. */
1821 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1822 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1823 if (!optab1
1824 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1825 || !optab2
1826 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1828 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1829 return NULL;
1830 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1831 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1832 if (!optab1
1833 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1834 || !optab2
1835 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1836 return NULL;
1839 *type_in = vectype;
1840 *type_out = vectype;
1841 if (*type_in == NULL_TREE)
1842 return NULL;
1844 if (dt == vect_external_def
1845 && TREE_CODE (oprnd1) == SSA_NAME
1846 && is_a <loop_vec_info> (vinfo))
1848 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1849 ext_def = loop_preheader_edge (loop);
1850 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1852 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1853 if (bb == NULL
1854 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1855 ext_def = NULL;
1859 def = NULL_TREE;
1860 if (TREE_CODE (oprnd1) == INTEGER_CST
1861 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1862 def = oprnd1;
1863 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1865 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1866 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1867 && TYPE_PRECISION (TREE_TYPE (rhs1))
1868 == TYPE_PRECISION (type))
1869 def = rhs1;
1872 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1873 if (def == NULL_TREE)
1875 def = vect_recog_temp_ssa_var (type, NULL);
1876 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1877 if (ext_def)
1879 basic_block new_bb
1880 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1881 gcc_assert (!new_bb);
1883 else
1884 append_pattern_def_seq (stmt_vinfo, def_stmt);
1886 stype = TREE_TYPE (def);
1888 if (TREE_CODE (def) == INTEGER_CST)
1890 if (!tree_fits_uhwi_p (def)
1891 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
1892 || integer_zerop (def))
1893 return NULL;
1894 def2 = build_int_cst (stype,
1895 GET_MODE_PRECISION (TYPE_MODE (type))
1896 - tree_to_uhwi (def));
1898 else
1900 tree vecstype = get_vectype_for_scalar_type (stype);
1901 stmt_vec_info def_stmt_vinfo;
1903 if (vecstype == NULL_TREE)
1904 return NULL;
1905 def2 = vect_recog_temp_ssa_var (stype, NULL);
1906 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1907 if (ext_def)
1909 basic_block new_bb
1910 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1911 gcc_assert (!new_bb);
1913 else
1915 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1916 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1917 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1918 append_pattern_def_seq (stmt_vinfo, def_stmt);
1921 def2 = vect_recog_temp_ssa_var (stype, NULL);
1922 tree mask
1923 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1924 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1925 gimple_assign_lhs (def_stmt), mask);
1926 if (ext_def)
1928 basic_block new_bb
1929 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1930 gcc_assert (!new_bb);
1932 else
1934 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1935 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1936 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1937 append_pattern_def_seq (stmt_vinfo, def_stmt);
1941 var1 = vect_recog_temp_ssa_var (type, NULL);
1942 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1943 ? LSHIFT_EXPR : RSHIFT_EXPR,
1944 oprnd0, def);
1945 append_pattern_def_seq (stmt_vinfo, def_stmt);
1947 var2 = vect_recog_temp_ssa_var (type, NULL);
1948 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1949 ? RSHIFT_EXPR : LSHIFT_EXPR,
1950 oprnd0, def2);
1951 append_pattern_def_seq (stmt_vinfo, def_stmt);
1953 /* Pattern detected. */
1954 if (dump_enabled_p ())
1955 dump_printf_loc (MSG_NOTE, vect_location,
1956 "vect_recog_rotate_pattern: detected:\n");
1958 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1959 var = vect_recog_temp_ssa_var (type, NULL);
1960 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
1962 if (dump_enabled_p ())
1963 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1965 stmts->safe_push (last_stmt);
1966 return pattern_stmt;
1969 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1970 vectorized:
1972 type a_t;
1973 TYPE b_T, res_T;
1975 S1 a_t = ;
1976 S2 b_T = ;
1977 S3 res_T = b_T op a_t;
1979 where type 'TYPE' is a type with different size than 'type',
1980 and op is <<, >> or rotate.
1982 Also detect cases:
1984 type a_t;
1985 TYPE b_T, c_T, res_T;
1987 S0 c_T = ;
1988 S1 a_t = (type) c_T;
1989 S2 b_T = ;
1990 S3 res_T = b_T op a_t;
1992 Input/Output:
1994 * STMTS: Contains a stmt from which the pattern search begins,
1995 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1996 with a shift/rotate which has same type on both operands, in the
1997 second case just b_T op c_T, in the first case with added cast
1998 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2000 Output:
2002 * TYPE_IN: The type of the input arguments to the pattern.
2004 * TYPE_OUT: The type of the output of this pattern.
2006 * Return value: A new stmt that will be used to replace the shift/rotate
2007 S3 stmt. */
2009 static gimple *
2010 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2011 tree *type_in, tree *type_out)
2013 gimple *last_stmt = stmts->pop ();
2014 tree oprnd0, oprnd1, lhs, var;
2015 gimple *pattern_stmt, *def_stmt;
2016 enum tree_code rhs_code;
2017 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2018 vec_info *vinfo = stmt_vinfo->vinfo;
2019 enum vect_def_type dt;
2021 if (!is_gimple_assign (last_stmt))
2022 return NULL;
2024 rhs_code = gimple_assign_rhs_code (last_stmt);
2025 switch (rhs_code)
2027 case LSHIFT_EXPR:
2028 case RSHIFT_EXPR:
2029 case LROTATE_EXPR:
2030 case RROTATE_EXPR:
2031 break;
2032 default:
2033 return NULL;
2036 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2037 return NULL;
2039 lhs = gimple_assign_lhs (last_stmt);
2040 oprnd0 = gimple_assign_rhs1 (last_stmt);
2041 oprnd1 = gimple_assign_rhs2 (last_stmt);
2042 if (TREE_CODE (oprnd0) != SSA_NAME
2043 || TREE_CODE (oprnd1) != SSA_NAME
2044 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2045 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2046 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2047 || TYPE_PRECISION (TREE_TYPE (lhs))
2048 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2049 return NULL;
2051 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2052 return NULL;
2054 if (dt != vect_internal_def)
2055 return NULL;
2057 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2058 *type_out = *type_in;
2059 if (*type_in == NULL_TREE)
2060 return NULL;
2062 tree def = NULL_TREE;
2063 if (gimple_assign_cast_p (def_stmt))
2065 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2066 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2067 && TYPE_PRECISION (TREE_TYPE (rhs1))
2068 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2069 def = rhs1;
2072 if (def == NULL_TREE)
2074 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2075 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2076 new_pattern_def_seq (stmt_vinfo, def_stmt);
2079 /* Pattern detected. */
2080 if (dump_enabled_p ())
2081 dump_printf_loc (MSG_NOTE, vect_location,
2082 "vect_recog_vector_vector_shift_pattern: detected:\n");
2084 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2085 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2086 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2088 if (dump_enabled_p ())
2089 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2091 stmts->safe_push (last_stmt);
2092 return pattern_stmt;
2095 /* Detect multiplication by constant which are postive or negatives of power 2,
2096 and convert them to shift patterns.
2098 Mult with constants that are postive power of two.
2099 type a_t;
2100 type b_t
2101 S1: b_t = a_t * n
2105 Mult with constants that are negative power of two.
2106 S2: b_t = a_t * -n
2108 Input/Output:
2110 STMTS: Contains a stmt from which the pattern search begins,
2111 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2112 constant operand is a power of 2.
2113 type a_t, b_t
2114 S1': b_t = a_t << log2 (n)
2116 Convert the mult operation to LSHIFT and followed by a NEGATE
2117 if constant operand is a negative power of 2.
2118 type a_t, b_t, res_T;
2119 S2': b_t = a_t << log2 (n)
2120 S3': res_T = - (b_t)
2122 Output:
2124 * TYPE_IN: The type of the input arguments to the pattern.
2126 * TYPE_OUT: The type of the output of this pattern.
2128 * Return value: A new stmt that will be used to replace the multiplication
2129 S1 or S2 stmt. */
2131 static gimple *
2132 vect_recog_mult_pattern (vec<gimple *> *stmts,
2133 tree *type_in, tree *type_out)
2135 gimple *last_stmt = stmts->pop ();
2136 tree oprnd0, oprnd1, vectype, itype;
2137 gimple *pattern_stmt, *def_stmt;
2138 optab optab;
2139 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2140 int power2_val, power2_neg_val;
2141 tree shift;
2143 if (!is_gimple_assign (last_stmt))
2144 return NULL;
2146 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2147 return NULL;
2149 oprnd0 = gimple_assign_rhs1 (last_stmt);
2150 oprnd1 = gimple_assign_rhs2 (last_stmt);
2151 itype = TREE_TYPE (oprnd0);
2153 if (TREE_CODE (oprnd0) != SSA_NAME
2154 || TREE_CODE (oprnd1) != INTEGER_CST
2155 || !INTEGRAL_TYPE_P (itype)
2156 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2157 return NULL;
2159 vectype = get_vectype_for_scalar_type (itype);
2160 if (vectype == NULL_TREE)
2161 return NULL;
2163 /* If the target can handle vectorized multiplication natively,
2164 don't attempt to optimize this. */
2165 optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2166 if (optab != unknown_optab)
2168 machine_mode vec_mode = TYPE_MODE (vectype);
2169 int icode = (int) optab_handler (optab, vec_mode);
2170 if (icode != CODE_FOR_nothing)
2171 return NULL;
2174 /* If target cannot handle vector left shift then we cannot
2175 optimize and bail out. */
2176 optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2177 if (!optab
2178 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2179 return NULL;
2181 power2_val = wi::exact_log2 (oprnd1);
2182 power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2184 /* Handle constant operands that are postive or negative powers of 2. */
2185 if (power2_val != -1)
2187 shift = build_int_cst (itype, power2_val);
2188 pattern_stmt
2189 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2190 LSHIFT_EXPR, oprnd0, shift);
2192 else if (power2_neg_val != -1)
2194 /* If the target cannot handle vector NEGATE then we cannot
2195 do the optimization. */
2196 optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2197 if (!optab
2198 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2199 return NULL;
2201 shift = build_int_cst (itype, power2_neg_val);
2202 def_stmt
2203 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2204 LSHIFT_EXPR, oprnd0, shift);
2205 new_pattern_def_seq (stmt_vinfo, def_stmt);
2206 pattern_stmt
2207 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2208 NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2210 else
2211 return NULL;
2213 /* Pattern detected. */
2214 if (dump_enabled_p ())
2215 dump_printf_loc (MSG_NOTE, vect_location,
2216 "vect_recog_mult_pattern: detected:\n");
2218 if (dump_enabled_p ())
2219 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2220 pattern_stmt,0);
2222 stmts->safe_push (last_stmt);
2223 *type_in = vectype;
2224 *type_out = vectype;
2226 return pattern_stmt;
2229 /* Detect a signed division by a constant that wouldn't be
2230 otherwise vectorized:
2232 type a_t, b_t;
2234 S1 a_t = b_t / N;
2236 where type 'type' is an integral type and N is a constant.
2238 Similarly handle modulo by a constant:
2240 S4 a_t = b_t % N;
2242 Input/Output:
2244 * STMTS: Contains a stmt from which the pattern search begins,
2245 i.e. the division stmt. S1 is replaced by if N is a power
2246 of two constant and type is signed:
2247 S3 y_t = b_t < 0 ? N - 1 : 0;
2248 S2 x_t = b_t + y_t;
2249 S1' a_t = x_t >> log2 (N);
2251 S4 is replaced if N is a power of two constant and
2252 type is signed by (where *_T temporaries have unsigned type):
2253 S9 y_T = b_t < 0 ? -1U : 0U;
2254 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2255 S7 z_t = (type) z_T;
2256 S6 w_t = b_t + z_t;
2257 S5 x_t = w_t & (N - 1);
2258 S4' a_t = x_t - z_t;
2260 Output:
2262 * TYPE_IN: The type of the input arguments to the pattern.
2264 * TYPE_OUT: The type of the output of this pattern.
2266 * Return value: A new stmt that will be used to replace the division
2267 S1 or modulo S4 stmt. */
2269 static gimple *
2270 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2271 tree *type_in, tree *type_out)
2273 gimple *last_stmt = stmts->pop ();
2274 tree oprnd0, oprnd1, vectype, itype, cond;
2275 gimple *pattern_stmt, *def_stmt;
2276 enum tree_code rhs_code;
2277 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2278 vec_info *vinfo = stmt_vinfo->vinfo;
2279 optab optab;
2280 tree q;
2281 int dummy_int, prec;
2282 stmt_vec_info def_stmt_vinfo;
2284 if (!is_gimple_assign (last_stmt))
2285 return NULL;
2287 rhs_code = gimple_assign_rhs_code (last_stmt);
2288 switch (rhs_code)
2290 case TRUNC_DIV_EXPR:
2291 case TRUNC_MOD_EXPR:
2292 break;
2293 default:
2294 return NULL;
2297 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2298 return NULL;
2300 oprnd0 = gimple_assign_rhs1 (last_stmt);
2301 oprnd1 = gimple_assign_rhs2 (last_stmt);
2302 itype = TREE_TYPE (oprnd0);
2303 if (TREE_CODE (oprnd0) != SSA_NAME
2304 || TREE_CODE (oprnd1) != INTEGER_CST
2305 || TREE_CODE (itype) != INTEGER_TYPE
2306 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2307 return NULL;
2309 vectype = get_vectype_for_scalar_type (itype);
2310 if (vectype == NULL_TREE)
2311 return NULL;
2313 /* If the target can handle vectorized division or modulo natively,
2314 don't attempt to optimize this. */
2315 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2316 if (optab != unknown_optab)
2318 machine_mode vec_mode = TYPE_MODE (vectype);
2319 int icode = (int) optab_handler (optab, vec_mode);
2320 if (icode != CODE_FOR_nothing)
2321 return NULL;
2324 prec = TYPE_PRECISION (itype);
2325 if (integer_pow2p (oprnd1))
2327 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2328 return NULL;
2330 /* Pattern detected. */
2331 if (dump_enabled_p ())
2332 dump_printf_loc (MSG_NOTE, vect_location,
2333 "vect_recog_divmod_pattern: detected:\n");
2335 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2336 build_int_cst (itype, 0));
2337 if (rhs_code == TRUNC_DIV_EXPR)
2339 tree var = vect_recog_temp_ssa_var (itype, NULL);
2340 tree shift;
2341 def_stmt
2342 = gimple_build_assign (var, COND_EXPR, cond,
2343 fold_build2 (MINUS_EXPR, itype, oprnd1,
2344 build_int_cst (itype, 1)),
2345 build_int_cst (itype, 0));
2346 new_pattern_def_seq (stmt_vinfo, def_stmt);
2347 var = vect_recog_temp_ssa_var (itype, NULL);
2348 def_stmt
2349 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2350 gimple_assign_lhs (def_stmt));
2351 append_pattern_def_seq (stmt_vinfo, def_stmt);
2353 shift = build_int_cst (itype, tree_log2 (oprnd1));
2354 pattern_stmt
2355 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2356 RSHIFT_EXPR, var, shift);
2358 else
2360 tree signmask;
2361 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2362 if (compare_tree_int (oprnd1, 2) == 0)
2364 signmask = vect_recog_temp_ssa_var (itype, NULL);
2365 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2366 build_int_cst (itype, 1),
2367 build_int_cst (itype, 0));
2368 append_pattern_def_seq (stmt_vinfo, def_stmt);
2370 else
2372 tree utype
2373 = build_nonstandard_integer_type (prec, 1);
2374 tree vecutype = get_vectype_for_scalar_type (utype);
2375 tree shift
2376 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2377 - tree_log2 (oprnd1));
2378 tree var = vect_recog_temp_ssa_var (utype, NULL);
2380 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2381 build_int_cst (utype, -1),
2382 build_int_cst (utype, 0));
2383 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2384 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2385 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2386 append_pattern_def_seq (stmt_vinfo, def_stmt);
2387 var = vect_recog_temp_ssa_var (utype, NULL);
2388 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2389 gimple_assign_lhs (def_stmt),
2390 shift);
2391 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2392 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2393 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2394 append_pattern_def_seq (stmt_vinfo, def_stmt);
2395 signmask = vect_recog_temp_ssa_var (itype, NULL);
2396 def_stmt
2397 = gimple_build_assign (signmask, NOP_EXPR, var);
2398 append_pattern_def_seq (stmt_vinfo, def_stmt);
2400 def_stmt
2401 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2402 PLUS_EXPR, oprnd0, signmask);
2403 append_pattern_def_seq (stmt_vinfo, def_stmt);
2404 def_stmt
2405 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2406 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2407 fold_build2 (MINUS_EXPR, itype, oprnd1,
2408 build_int_cst (itype, 1)));
2409 append_pattern_def_seq (stmt_vinfo, def_stmt);
2411 pattern_stmt
2412 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2413 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2414 signmask);
2417 if (dump_enabled_p ())
2418 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2421 stmts->safe_push (last_stmt);
2423 *type_in = vectype;
2424 *type_out = vectype;
2425 return pattern_stmt;
2428 if (prec > HOST_BITS_PER_WIDE_INT
2429 || integer_zerop (oprnd1))
2430 return NULL;
2432 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2433 return NULL;
2435 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2437 if (TYPE_UNSIGNED (itype))
2439 unsigned HOST_WIDE_INT mh, ml;
2440 int pre_shift, post_shift;
2441 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2442 & GET_MODE_MASK (TYPE_MODE (itype)));
2443 tree t1, t2, t3, t4;
2445 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2446 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2447 return NULL;
2449 /* Find a suitable multiplier and right shift count
2450 instead of multiplying with D. */
2451 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2453 /* If the suggested multiplier is more than SIZE bits, we can do better
2454 for even divisors, using an initial right shift. */
2455 if (mh != 0 && (d & 1) == 0)
2457 pre_shift = floor_log2 (d & -d);
2458 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2459 &ml, &post_shift, &dummy_int);
2460 gcc_assert (!mh);
2462 else
2463 pre_shift = 0;
2465 if (mh != 0)
2467 if (post_shift - 1 >= prec)
2468 return NULL;
2470 /* t1 = oprnd0 h* ml;
2471 t2 = oprnd0 - t1;
2472 t3 = t2 >> 1;
2473 t4 = t1 + t3;
2474 q = t4 >> (post_shift - 1); */
2475 t1 = vect_recog_temp_ssa_var (itype, NULL);
2476 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2477 build_int_cst (itype, ml));
2478 append_pattern_def_seq (stmt_vinfo, def_stmt);
2480 t2 = vect_recog_temp_ssa_var (itype, NULL);
2481 def_stmt
2482 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2483 append_pattern_def_seq (stmt_vinfo, def_stmt);
2485 t3 = vect_recog_temp_ssa_var (itype, NULL);
2486 def_stmt
2487 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2488 append_pattern_def_seq (stmt_vinfo, def_stmt);
2490 t4 = vect_recog_temp_ssa_var (itype, NULL);
2491 def_stmt
2492 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2494 if (post_shift != 1)
2496 append_pattern_def_seq (stmt_vinfo, def_stmt);
2498 q = vect_recog_temp_ssa_var (itype, NULL);
2499 pattern_stmt
2500 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2501 build_int_cst (itype, post_shift - 1));
2503 else
2505 q = t4;
2506 pattern_stmt = def_stmt;
2509 else
2511 if (pre_shift >= prec || post_shift >= prec)
2512 return NULL;
2514 /* t1 = oprnd0 >> pre_shift;
2515 t2 = t1 h* ml;
2516 q = t2 >> post_shift; */
2517 if (pre_shift)
2519 t1 = vect_recog_temp_ssa_var (itype, NULL);
2520 def_stmt
2521 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2522 build_int_cst (NULL, pre_shift));
2523 append_pattern_def_seq (stmt_vinfo, def_stmt);
2525 else
2526 t1 = oprnd0;
2528 t2 = vect_recog_temp_ssa_var (itype, NULL);
2529 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2530 build_int_cst (itype, ml));
2532 if (post_shift)
2534 append_pattern_def_seq (stmt_vinfo, def_stmt);
2536 q = vect_recog_temp_ssa_var (itype, NULL);
2537 def_stmt
2538 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2539 build_int_cst (itype, post_shift));
2541 else
2542 q = t2;
2544 pattern_stmt = def_stmt;
2547 else
2549 unsigned HOST_WIDE_INT ml;
2550 int post_shift;
2551 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2552 unsigned HOST_WIDE_INT abs_d;
2553 bool add = false;
2554 tree t1, t2, t3, t4;
2556 /* Give up for -1. */
2557 if (d == -1)
2558 return NULL;
2560 /* Since d might be INT_MIN, we have to cast to
2561 unsigned HOST_WIDE_INT before negating to avoid
2562 undefined signed overflow. */
2563 abs_d = (d >= 0
2564 ? (unsigned HOST_WIDE_INT) d
2565 : - (unsigned HOST_WIDE_INT) d);
2567 /* n rem d = n rem -d */
2568 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2570 d = abs_d;
2571 oprnd1 = build_int_cst (itype, abs_d);
2573 else if (HOST_BITS_PER_WIDE_INT >= prec
2574 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2575 /* This case is not handled correctly below. */
2576 return NULL;
2578 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2579 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2581 add = true;
2582 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2584 if (post_shift >= prec)
2585 return NULL;
2587 /* t1 = oprnd0 h* ml; */
2588 t1 = vect_recog_temp_ssa_var (itype, NULL);
2589 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2590 build_int_cst (itype, ml));
2592 if (add)
2594 /* t2 = t1 + oprnd0; */
2595 append_pattern_def_seq (stmt_vinfo, def_stmt);
2596 t2 = vect_recog_temp_ssa_var (itype, NULL);
2597 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2599 else
2600 t2 = t1;
2602 if (post_shift)
2604 /* t3 = t2 >> post_shift; */
2605 append_pattern_def_seq (stmt_vinfo, def_stmt);
2606 t3 = vect_recog_temp_ssa_var (itype, NULL);
2607 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2608 build_int_cst (itype, post_shift));
2610 else
2611 t3 = t2;
2613 wide_int oprnd0_min, oprnd0_max;
2614 int msb = 1;
2615 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2617 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2618 msb = 0;
2619 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2620 msb = -1;
2623 if (msb == 0 && d >= 0)
2625 /* q = t3; */
2626 q = t3;
2627 pattern_stmt = def_stmt;
2629 else
2631 /* t4 = oprnd0 >> (prec - 1);
2632 or if we know from VRP that oprnd0 >= 0
2633 t4 = 0;
2634 or if we know from VRP that oprnd0 < 0
2635 t4 = -1; */
2636 append_pattern_def_seq (stmt_vinfo, def_stmt);
2637 t4 = vect_recog_temp_ssa_var (itype, NULL);
2638 if (msb != 1)
2639 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2640 build_int_cst (itype, msb));
2641 else
2642 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2643 build_int_cst (itype, prec - 1));
2644 append_pattern_def_seq (stmt_vinfo, def_stmt);
2646 /* q = t3 - t4; or q = t4 - t3; */
2647 q = vect_recog_temp_ssa_var (itype, NULL);
2648 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2649 d < 0 ? t3 : t4);
2653 if (rhs_code == TRUNC_MOD_EXPR)
2655 tree r, t1;
2657 /* We divided. Now finish by:
2658 t1 = q * oprnd1;
2659 r = oprnd0 - t1; */
2660 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2662 t1 = vect_recog_temp_ssa_var (itype, NULL);
2663 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2664 append_pattern_def_seq (stmt_vinfo, def_stmt);
2666 r = vect_recog_temp_ssa_var (itype, NULL);
2667 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2670 /* Pattern detected. */
2671 if (dump_enabled_p ())
2673 dump_printf_loc (MSG_NOTE, vect_location,
2674 "vect_recog_divmod_pattern: detected: ");
2675 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
2676 dump_printf (MSG_NOTE, "\n");
2679 stmts->safe_push (last_stmt);
2681 *type_in = vectype;
2682 *type_out = vectype;
2683 return pattern_stmt;
2686 /* Function vect_recog_mixed_size_cond_pattern
2688 Try to find the following pattern:
2690 type x_t, y_t;
2691 TYPE a_T, b_T, c_T;
2692 loop:
2693 S1 a_T = x_t CMP y_t ? b_T : c_T;
2695 where type 'TYPE' is an integral type which has different size
2696 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
2697 than 'type', the constants need to fit into an integer type
2698 with the same width as 'type') or results of conversion from 'type'.
2700 Input:
2702 * LAST_STMT: A stmt from which the pattern search begins.
2704 Output:
2706 * TYPE_IN: The type of the input arguments to the pattern.
2708 * TYPE_OUT: The type of the output of this pattern.
2710 * Return value: A new stmt that will be used to replace the pattern.
2711 Additionally a def_stmt is added.
2713 a_it = x_t CMP y_t ? b_it : c_it;
2714 a_T = (TYPE) a_it; */
2716 static gimple *
2717 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
2718 tree *type_out)
2720 gimple *last_stmt = (*stmts)[0];
2721 tree cond_expr, then_clause, else_clause;
2722 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
2723 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
2724 gimple *pattern_stmt, *def_stmt;
2725 vec_info *vinfo = stmt_vinfo->vinfo;
2726 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2727 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
2728 bool promotion;
2729 tree comp_scalar_type;
2731 if (!is_gimple_assign (last_stmt)
2732 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2733 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2734 return NULL;
2736 cond_expr = gimple_assign_rhs1 (last_stmt);
2737 then_clause = gimple_assign_rhs2 (last_stmt);
2738 else_clause = gimple_assign_rhs3 (last_stmt);
2740 if (!COMPARISON_CLASS_P (cond_expr))
2741 return NULL;
2743 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2744 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
2745 if (comp_vectype == NULL_TREE)
2746 return NULL;
2748 type = gimple_expr_type (last_stmt);
2749 if (types_compatible_p (type, comp_scalar_type)
2750 || ((TREE_CODE (then_clause) != INTEGER_CST
2751 || TREE_CODE (else_clause) != INTEGER_CST)
2752 && !INTEGRAL_TYPE_P (comp_scalar_type))
2753 || !INTEGRAL_TYPE_P (type))
2754 return NULL;
2756 if ((TREE_CODE (then_clause) != INTEGER_CST
2757 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2758 &def_stmt0, &promotion))
2759 || (TREE_CODE (else_clause) != INTEGER_CST
2760 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2761 &def_stmt1, &promotion)))
2762 return NULL;
2764 if (orig_type0 && orig_type1
2765 && !types_compatible_p (orig_type0, orig_type1))
2766 return NULL;
2768 if (orig_type0)
2770 if (!types_compatible_p (orig_type0, comp_scalar_type))
2771 return NULL;
2772 then_clause = gimple_assign_rhs1 (def_stmt0);
2773 itype = orig_type0;
2776 if (orig_type1)
2778 if (!types_compatible_p (orig_type1, comp_scalar_type))
2779 return NULL;
2780 else_clause = gimple_assign_rhs1 (def_stmt1);
2781 itype = orig_type1;
2785 HOST_WIDE_INT cmp_mode_size
2786 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2788 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
2789 return NULL;
2791 vectype = get_vectype_for_scalar_type (type);
2792 if (vectype == NULL_TREE)
2793 return NULL;
2795 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2796 return NULL;
2798 if (itype == NULL_TREE)
2799 itype = build_nonstandard_integer_type (cmp_mode_size,
2800 TYPE_UNSIGNED (type));
2802 if (itype == NULL_TREE
2803 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
2804 return NULL;
2806 vecitype = get_vectype_for_scalar_type (itype);
2807 if (vecitype == NULL_TREE)
2808 return NULL;
2810 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2811 return NULL;
2813 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
2815 if ((TREE_CODE (then_clause) == INTEGER_CST
2816 && !int_fits_type_p (then_clause, itype))
2817 || (TREE_CODE (else_clause) == INTEGER_CST
2818 && !int_fits_type_p (else_clause, itype)))
2819 return NULL;
2822 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2823 COND_EXPR, unshare_expr (cond_expr),
2824 fold_convert (itype, then_clause),
2825 fold_convert (itype, else_clause));
2826 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2827 NOP_EXPR, gimple_assign_lhs (def_stmt));
2829 new_pattern_def_seq (stmt_vinfo, def_stmt);
2830 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
2831 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2832 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2833 *type_in = vecitype;
2834 *type_out = vectype;
2836 if (dump_enabled_p ())
2837 dump_printf_loc (MSG_NOTE, vect_location,
2838 "vect_recog_mixed_size_cond_pattern: detected:\n");
2840 return pattern_stmt;
2844 /* Helper function of vect_recog_bool_pattern. Called recursively, return
2845 true if bool VAR can and should be optimized that way. Assume it shouldn't
2846 in case it's a result of a comparison which can be directly vectorized into
2847 a vector comparison. */
2849 static bool
2850 check_bool_pattern (tree var, vec_info *vinfo)
2852 gimple *def_stmt;
2853 enum vect_def_type dt;
2854 tree rhs1;
2855 enum tree_code rhs_code;
2857 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
2858 return false;
2860 if (dt != vect_internal_def)
2861 return false;
2863 if (!is_gimple_assign (def_stmt))
2864 return false;
2866 if (!has_single_use (var))
2867 return false;
2869 rhs1 = gimple_assign_rhs1 (def_stmt);
2870 rhs_code = gimple_assign_rhs_code (def_stmt);
2871 switch (rhs_code)
2873 case SSA_NAME:
2874 return check_bool_pattern (rhs1, vinfo);
2876 CASE_CONVERT:
2877 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2878 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2879 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2880 return false;
2881 return check_bool_pattern (rhs1, vinfo);
2883 case BIT_NOT_EXPR:
2884 return check_bool_pattern (rhs1, vinfo);
2886 case BIT_AND_EXPR:
2887 case BIT_IOR_EXPR:
2888 case BIT_XOR_EXPR:
2889 if (!check_bool_pattern (rhs1, vinfo))
2890 return false;
2891 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
2893 default:
2894 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2896 tree vecitype, comp_vectype, mask_type;
2898 /* If the comparison can throw, then is_gimple_condexpr will be
2899 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2900 if (stmt_could_throw_p (def_stmt))
2901 return false;
2903 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2904 if (comp_vectype == NULL_TREE)
2905 return false;
2907 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
2908 if (mask_type
2909 && expand_vec_cmp_expr_p (comp_vectype, mask_type))
2910 return false;
2912 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2914 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2915 tree itype
2916 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
2917 vecitype = get_vectype_for_scalar_type (itype);
2918 if (vecitype == NULL_TREE)
2919 return false;
2921 else
2922 vecitype = comp_vectype;
2923 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2925 return false;
2930 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2931 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
2932 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
2934 static tree
2935 adjust_bool_pattern_cast (tree type, tree var)
2937 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2938 gimple *cast_stmt, *pattern_stmt;
2940 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
2941 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2942 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
2943 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2944 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
2945 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2946 return gimple_assign_lhs (cast_stmt);
2950 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2951 recursively. VAR is an SSA_NAME that should be transformed from bool
2952 to a wider integer type, OUT_TYPE is the desired final integer type of
2953 the whole pattern, TRUEVAL should be NULL unless optimizing
2954 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2955 in the then_clause, STMTS is where statements with added pattern stmts
2956 should be pushed to. */
2958 static tree
2959 adjust_bool_pattern (tree var, tree out_type, tree trueval,
2960 vec<gimple *> *stmts)
2962 gimple *stmt = SSA_NAME_DEF_STMT (var);
2963 enum tree_code rhs_code, def_rhs_code;
2964 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2965 location_t loc;
2966 gimple *pattern_stmt, *def_stmt;
2968 rhs1 = gimple_assign_rhs1 (stmt);
2969 rhs2 = gimple_assign_rhs2 (stmt);
2970 rhs_code = gimple_assign_rhs_code (stmt);
2971 loc = gimple_location (stmt);
2972 switch (rhs_code)
2974 case SSA_NAME:
2975 CASE_CONVERT:
2976 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2977 itype = TREE_TYPE (irhs1);
2978 pattern_stmt
2979 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2980 SSA_NAME, irhs1);
2981 break;
2983 case BIT_NOT_EXPR:
2984 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2985 itype = TREE_TYPE (irhs1);
2986 pattern_stmt
2987 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2988 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
2989 break;
2991 case BIT_AND_EXPR:
2992 /* Try to optimize x = y & (a < b ? 1 : 0); into
2993 x = (a < b ? y : 0);
2995 E.g. for:
2996 bool a_b, b_b, c_b;
2997 TYPE d_T;
2999 S1 a_b = x1 CMP1 y1;
3000 S2 b_b = x2 CMP2 y2;
3001 S3 c_b = a_b & b_b;
3002 S4 d_T = (TYPE) c_b;
3004 we would normally emit:
3006 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3007 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3008 S3' c_T = a_T & b_T;
3009 S4' d_T = c_T;
3011 but we can save one stmt by using the
3012 result of one of the COND_EXPRs in the other COND_EXPR and leave
3013 BIT_AND_EXPR stmt out:
3015 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3016 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3017 S4' f_T = c_T;
3019 At least when VEC_COND_EXPR is implemented using masks
3020 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3021 computes the comparison masks and ands it, in one case with
3022 all ones vector, in the other case with a vector register.
3023 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3024 often more expensive. */
3025 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3026 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3027 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3029 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3030 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3031 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3032 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3034 gimple *tstmt;
3035 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3036 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
3037 tstmt = stmts->pop ();
3038 gcc_assert (tstmt == def_stmt);
3039 stmts->quick_push (stmt);
3040 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3041 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3042 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3043 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3044 return irhs2;
3046 else
3047 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3048 goto and_ior_xor;
3050 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3051 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3052 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3054 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3055 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3056 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3057 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3059 gimple *tstmt;
3060 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3061 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
3062 tstmt = stmts->pop ();
3063 gcc_assert (tstmt == def_stmt);
3064 stmts->quick_push (stmt);
3065 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3066 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
3067 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
3068 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3069 return irhs1;
3071 else
3072 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3073 goto and_ior_xor;
3075 /* FALLTHRU */
3076 case BIT_IOR_EXPR:
3077 case BIT_XOR_EXPR:
3078 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3079 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3080 and_ior_xor:
3081 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3082 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3084 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3085 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3086 int out_prec = TYPE_PRECISION (out_type);
3087 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3088 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3089 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3090 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3091 else
3093 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3094 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3097 itype = TREE_TYPE (irhs1);
3098 pattern_stmt
3099 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3100 rhs_code, irhs1, irhs2);
3101 break;
3103 default:
3104 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3105 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3106 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3107 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3108 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3110 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3111 itype
3112 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3114 else
3115 itype = TREE_TYPE (rhs1);
3116 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3117 if (trueval == NULL_TREE)
3118 trueval = build_int_cst (itype, 1);
3119 else
3120 gcc_checking_assert (useless_type_conversion_p (itype,
3121 TREE_TYPE (trueval)));
3122 pattern_stmt
3123 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3124 COND_EXPR, cond_expr, trueval,
3125 build_int_cst (itype, 0));
3126 break;
3129 stmts->safe_push (stmt);
3130 gimple_set_location (pattern_stmt, loc);
3131 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3132 return gimple_assign_lhs (pattern_stmt);
3136 /* Return the proper type for converting bool VAR into
3137 an integer value or NULL_TREE if no such type exists.
3138 The type is chosen so that converted value has the
3139 same number of elements as VAR's vector type. */
3141 static tree
3142 search_type_for_mask (tree var, vec_info *vinfo)
3144 gimple *def_stmt;
3145 enum vect_def_type dt;
3146 tree rhs1;
3147 enum tree_code rhs_code;
3148 tree res = NULL_TREE, res2;
3150 if (TREE_CODE (var) != SSA_NAME)
3151 return NULL_TREE;
3153 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3154 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3155 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3156 return NULL_TREE;
3158 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3159 return NULL_TREE;
3161 if (dt != vect_internal_def)
3162 return NULL_TREE;
3164 if (!is_gimple_assign (def_stmt))
3165 return NULL_TREE;
3167 rhs_code = gimple_assign_rhs_code (def_stmt);
3168 rhs1 = gimple_assign_rhs1 (def_stmt);
3170 switch (rhs_code)
3172 case SSA_NAME:
3173 case BIT_NOT_EXPR:
3174 CASE_CONVERT:
3175 res = search_type_for_mask (rhs1, vinfo);
3176 break;
3178 case BIT_AND_EXPR:
3179 case BIT_IOR_EXPR:
3180 case BIT_XOR_EXPR:
3181 res = search_type_for_mask (rhs1, vinfo);
3182 res2 = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo);
3183 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3184 res = res2;
3185 break;
3187 default:
3188 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3190 tree comp_vectype, mask_type;
3192 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3193 if (comp_vectype == NULL_TREE)
3194 return NULL_TREE;
3196 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3197 if (!mask_type
3198 || !expand_vec_cmp_expr_p (comp_vectype, mask_type))
3199 return NULL_TREE;
3201 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3202 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3204 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3205 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3207 else
3208 res = TREE_TYPE (rhs1);
3212 return res;
3216 /* Function vect_recog_bool_pattern
3218 Try to find pattern like following:
3220 bool a_b, b_b, c_b, d_b, e_b;
3221 TYPE f_T;
3222 loop:
3223 S1 a_b = x1 CMP1 y1;
3224 S2 b_b = x2 CMP2 y2;
3225 S3 c_b = a_b & b_b;
3226 S4 d_b = x3 CMP3 y3;
3227 S5 e_b = c_b | d_b;
3228 S6 f_T = (TYPE) e_b;
3230 where type 'TYPE' is an integral type. Or a similar pattern
3231 ending in
3233 S6 f_Y = e_b ? r_Y : s_Y;
3235 as results from if-conversion of a complex condition.
3237 Input:
3239 * LAST_STMT: A stmt at the end from which the pattern
3240 search begins, i.e. cast of a bool to
3241 an integer type.
3243 Output:
3245 * TYPE_IN: The type of the input arguments to the pattern.
3247 * TYPE_OUT: The type of the output of this pattern.
3249 * Return value: A new stmt that will be used to replace the pattern.
3251 Assuming size of TYPE is the same as size of all comparisons
3252 (otherwise some casts would be added where needed), the above
3253 sequence we create related pattern stmts:
3254 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3255 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3256 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3257 S5' e_T = c_T | d_T;
3258 S6' f_T = e_T;
3260 Instead of the above S3' we could emit:
3261 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3262 S3' c_T = a_T | b_T;
3263 but the above is more efficient. */
3265 static gimple *
3266 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3267 tree *type_out)
3269 gimple *last_stmt = stmts->pop ();
3270 enum tree_code rhs_code;
3271 tree var, lhs, rhs, vectype;
3272 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3273 stmt_vec_info new_stmt_info;
3274 vec_info *vinfo = stmt_vinfo->vinfo;
3275 gimple *pattern_stmt;
3277 if (!is_gimple_assign (last_stmt))
3278 return NULL;
3280 var = gimple_assign_rhs1 (last_stmt);
3281 lhs = gimple_assign_lhs (last_stmt);
3283 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3284 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3285 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3286 return NULL;
3288 rhs_code = gimple_assign_rhs_code (last_stmt);
3289 if (CONVERT_EXPR_CODE_P (rhs_code))
3291 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3292 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3293 return NULL;
3294 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3295 if (vectype == NULL_TREE)
3296 return NULL;
3298 if (check_bool_pattern (var, vinfo))
3300 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3301 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3302 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3303 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3304 else
3305 pattern_stmt
3306 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3308 else
3310 tree type = search_type_for_mask (var, vinfo);
3311 tree cst0, cst1, tmp;
3313 if (!type)
3314 return NULL;
3316 /* We may directly use cond with narrowed type to avoid
3317 multiple cond exprs with following result packing and
3318 perform single cond with packed mask instead. In case
3319 of widening we better make cond first and then extract
3320 results. */
3321 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3322 type = TREE_TYPE (lhs);
3324 cst0 = build_int_cst (type, 0);
3325 cst1 = build_int_cst (type, 1);
3326 tmp = vect_recog_temp_ssa_var (type, NULL);
3327 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3329 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3331 tree new_vectype = get_vectype_for_scalar_type (type);
3332 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3333 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3334 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3335 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3337 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3338 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3342 *type_out = vectype;
3343 *type_in = vectype;
3344 stmts->safe_push (last_stmt);
3345 if (dump_enabled_p ())
3346 dump_printf_loc (MSG_NOTE, vect_location,
3347 "vect_recog_bool_pattern: detected:\n");
3349 return pattern_stmt;
3351 else if (rhs_code == COND_EXPR
3352 && TREE_CODE (var) == SSA_NAME)
3354 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3355 if (vectype == NULL_TREE)
3356 return NULL;
3358 /* Build a scalar type for the boolean result that when
3359 vectorized matches the vector type of the result in
3360 size and number of elements. */
3361 unsigned prec
3362 = wi::udiv_trunc (TYPE_SIZE (vectype),
3363 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3364 tree type
3365 = build_nonstandard_integer_type (prec,
3366 TYPE_UNSIGNED (TREE_TYPE (var)));
3367 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3368 return NULL;
3370 if (!check_bool_pattern (var, vinfo))
3371 return NULL;
3373 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3375 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3376 pattern_stmt
3377 = gimple_build_assign (lhs, COND_EXPR,
3378 build2 (NE_EXPR, boolean_type_node,
3379 rhs, build_int_cst (type, 0)),
3380 gimple_assign_rhs2 (last_stmt),
3381 gimple_assign_rhs3 (last_stmt));
3382 *type_out = vectype;
3383 *type_in = vectype;
3384 stmts->safe_push (last_stmt);
3385 if (dump_enabled_p ())
3386 dump_printf_loc (MSG_NOTE, vect_location,
3387 "vect_recog_bool_pattern: detected:\n");
3389 return pattern_stmt;
3391 else if (rhs_code == SSA_NAME
3392 && STMT_VINFO_DATA_REF (stmt_vinfo))
3394 stmt_vec_info pattern_stmt_info;
3395 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3396 gcc_assert (vectype != NULL_TREE);
3397 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3398 return NULL;
3400 if (check_bool_pattern (var, vinfo))
3401 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype),
3402 NULL_TREE, stmts);
3403 else
3405 tree type = search_type_for_mask (var, vinfo);
3406 tree cst0, cst1, new_vectype;
3408 if (!type)
3409 return NULL;
3411 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3412 type = TREE_TYPE (vectype);
3414 cst0 = build_int_cst (type, 0);
3415 cst1 = build_int_cst (type, 1);
3416 new_vectype = get_vectype_for_scalar_type (type);
3418 rhs = vect_recog_temp_ssa_var (type, NULL);
3419 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3421 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3422 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3423 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3424 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3427 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3428 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3430 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3431 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3432 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3433 rhs = rhs2;
3435 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3436 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3437 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3438 STMT_VINFO_DATA_REF (pattern_stmt_info)
3439 = STMT_VINFO_DATA_REF (stmt_vinfo);
3440 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3441 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3442 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3443 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3444 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3445 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3446 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3447 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3448 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3449 *type_out = vectype;
3450 *type_in = vectype;
3451 stmts->safe_push (last_stmt);
3452 if (dump_enabled_p ())
3453 dump_printf_loc (MSG_NOTE, vect_location,
3454 "vect_recog_bool_pattern: detected:\n");
3455 return pattern_stmt;
3457 else
3458 return NULL;
3462 /* A helper for vect_recog_mask_conversion_pattern. Build
3463 conversion of MASK to a type suitable for masking VECTYPE.
3464 Built statement gets required vectype and is appended to
3465 a pattern sequence of STMT_VINFO.
3467 Return converted mask. */
3469 static tree
3470 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3471 vec_info *vinfo)
3473 gimple *stmt;
3474 tree masktype, tmp;
3475 stmt_vec_info new_stmt_info;
3477 masktype = build_same_sized_truth_vector_type (vectype);
3478 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3479 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3480 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3481 set_vinfo_for_stmt (stmt, new_stmt_info);
3482 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3483 append_pattern_def_seq (stmt_vinfo, stmt);
3485 return tmp;
3489 /* Function vect_recog_mask_conversion_pattern
3491 Try to find statements which require boolean type
3492 converison. Additional conversion statements are
3493 added to handle such cases. For example:
3495 bool m_1, m_2, m_3;
3496 int i_4, i_5;
3497 double d_6, d_7;
3498 char c_1, c_2, c_3;
3500 S1 m_1 = i_4 > i_5;
3501 S2 m_2 = d_6 < d_7;
3502 S3 m_3 = m_1 & m_2;
3503 S4 c_1 = m_3 ? c_2 : c_3;
3505 Will be transformed into:
3507 S1 m_1 = i_4 > i_5;
3508 S2 m_2 = d_6 < d_7;
3509 S3'' m_2' = (_Bool[bitsize=32])m_2
3510 S3' m_3' = m_1 & m_2';
3511 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3512 S4' c_1' = m_3'' ? c_2 : c_3; */
3514 static gimple *
3515 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3516 tree *type_out)
3518 gimple *last_stmt = stmts->pop ();
3519 enum tree_code rhs_code;
3520 tree lhs, rhs1, rhs2, tmp, rhs1_type, rhs2_type, vectype1, vectype2;
3521 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3522 stmt_vec_info pattern_stmt_info;
3523 vec_info *vinfo = stmt_vinfo->vinfo;
3524 gimple *pattern_stmt;
3526 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3527 if (is_gimple_call (last_stmt)
3528 && gimple_call_internal_p (last_stmt)
3529 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3530 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3532 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3534 if (load)
3536 lhs = gimple_call_lhs (last_stmt);
3537 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3539 else
3541 rhs2 = gimple_call_arg (last_stmt, 3);
3542 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3545 rhs1 = gimple_call_arg (last_stmt, 2);
3546 rhs1_type = search_type_for_mask (rhs1, vinfo);
3547 if (!rhs1_type)
3548 return NULL;
3549 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3551 if (!vectype1 || !vectype2
3552 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3553 return NULL;
3555 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3557 if (load)
3559 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3560 pattern_stmt
3561 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3562 gimple_call_arg (last_stmt, 0),
3563 gimple_call_arg (last_stmt, 1),
3564 tmp);
3565 gimple_call_set_lhs (pattern_stmt, lhs);
3567 else
3568 pattern_stmt
3569 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3570 gimple_call_arg (last_stmt, 0),
3571 gimple_call_arg (last_stmt, 1),
3572 tmp,
3573 gimple_call_arg (last_stmt, 3));
3576 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3577 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3578 STMT_VINFO_DATA_REF (pattern_stmt_info)
3579 = STMT_VINFO_DATA_REF (stmt_vinfo);
3580 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3581 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3582 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3583 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3584 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3585 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3586 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3587 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3588 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3590 *type_out = vectype1;
3591 *type_in = vectype1;
3592 stmts->safe_push (last_stmt);
3593 if (dump_enabled_p ())
3594 dump_printf_loc (MSG_NOTE, vect_location,
3595 "vect_recog_mask_conversion_pattern: detected:\n");
3597 return pattern_stmt;
3600 if (!is_gimple_assign (last_stmt))
3601 return NULL;
3603 lhs = gimple_assign_lhs (last_stmt);
3604 rhs1 = gimple_assign_rhs1 (last_stmt);
3605 rhs_code = gimple_assign_rhs_code (last_stmt);
3607 /* Check for cond expression requiring mask conversion. */
3608 if (rhs_code == COND_EXPR)
3610 /* vect_recog_mixed_size_cond_pattern could apply.
3611 Do nothing then. */
3612 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3613 return NULL;
3615 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3617 if (TREE_CODE (rhs1) == SSA_NAME)
3619 rhs1_type = search_type_for_mask (rhs1, vinfo);
3620 if (!rhs1_type)
3621 return NULL;
3623 else
3624 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3626 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3628 if (!vectype1 || !vectype2
3629 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3630 return NULL;
3632 /* If rhs1 is a comparison we need to move it into a
3633 separate statement. */
3634 if (TREE_CODE (rhs1) != SSA_NAME)
3636 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3637 pattern_stmt = gimple_build_assign (tmp, rhs1);
3638 rhs1 = tmp;
3640 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3641 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3642 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
3643 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3646 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3648 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3649 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
3650 gimple_assign_rhs2 (last_stmt),
3651 gimple_assign_rhs3 (last_stmt));
3653 *type_out = vectype1;
3654 *type_in = vectype1;
3655 stmts->safe_push (last_stmt);
3656 if (dump_enabled_p ())
3657 dump_printf_loc (MSG_NOTE, vect_location,
3658 "vect_recog_mask_conversion_pattern: detected:\n");
3660 return pattern_stmt;
3663 /* Now check for binary boolean operations requiring conversion for
3664 one of operands. */
3665 if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
3666 return NULL;
3668 if (rhs_code != BIT_IOR_EXPR
3669 && rhs_code != BIT_XOR_EXPR
3670 && rhs_code != BIT_AND_EXPR)
3671 return NULL;
3673 rhs2 = gimple_assign_rhs2 (last_stmt);
3675 rhs1_type = search_type_for_mask (rhs1, vinfo);
3676 rhs2_type = search_type_for_mask (rhs2, vinfo);
3678 if (!rhs1_type || !rhs2_type
3679 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
3680 return NULL;
3682 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
3684 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
3685 if (!vectype1)
3686 return NULL;
3687 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
3689 else
3691 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
3692 if (!vectype1)
3693 return NULL;
3694 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3697 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3698 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
3700 *type_out = vectype1;
3701 *type_in = vectype1;
3702 stmts->safe_push (last_stmt);
3703 if (dump_enabled_p ())
3704 dump_printf_loc (MSG_NOTE, vect_location,
3705 "vect_recog_mask_conversion_pattern: detected:\n");
3707 return pattern_stmt;
3711 /* Mark statements that are involved in a pattern. */
3713 static inline void
3714 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
3715 tree pattern_vectype)
3717 stmt_vec_info pattern_stmt_info, def_stmt_info;
3718 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3719 vec_info *vinfo = orig_stmt_info->vinfo;
3720 gimple *def_stmt;
3722 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
3723 if (pattern_stmt_info == NULL)
3725 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3726 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3728 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
3730 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3731 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
3732 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3733 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3734 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3735 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
3736 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3737 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3738 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
3740 gimple_stmt_iterator si;
3741 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3742 !gsi_end_p (si); gsi_next (&si))
3744 def_stmt = gsi_stmt (si);
3745 def_stmt_info = vinfo_for_stmt (def_stmt);
3746 if (def_stmt_info == NULL)
3748 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3749 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3751 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3752 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3753 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
3754 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3755 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
3760 /* Function vect_pattern_recog_1
3762 Input:
3763 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3764 computation pattern.
3765 STMT: A stmt from which the pattern search should start.
3767 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
3768 expression that computes the same functionality and can be used to
3769 replace the sequence of stmts that are involved in the pattern.
3771 Output:
3772 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3773 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3774 relevant vector type. If 'TYPE_IN' is already a vector type, then this
3775 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3776 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3777 to the available target pattern.
3779 This function also does some bookkeeping, as explained in the documentation
3780 for vect_recog_pattern. */
3782 static void
3783 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3784 gimple_stmt_iterator si,
3785 vec<gimple *> *stmts_to_replace)
3787 gimple *stmt = gsi_stmt (si), *pattern_stmt;
3788 stmt_vec_info stmt_info;
3789 loop_vec_info loop_vinfo;
3790 tree pattern_vectype;
3791 tree type_in, type_out;
3792 enum tree_code code;
3793 int i;
3794 gimple *next;
3796 stmts_to_replace->truncate (0);
3797 stmts_to_replace->quick_push (stmt);
3798 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
3799 if (!pattern_stmt)
3800 return;
3802 stmt = stmts_to_replace->last ();
3803 stmt_info = vinfo_for_stmt (stmt);
3804 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3806 if (VECTOR_BOOLEAN_TYPE_P (type_in)
3807 || VECTOR_MODE_P (TYPE_MODE (type_in)))
3809 /* No need to check target support (already checked by the pattern
3810 recognition function). */
3811 pattern_vectype = type_out ? type_out : type_in;
3813 else
3815 machine_mode vec_mode;
3816 enum insn_code icode;
3817 optab optab;
3819 /* Check target support */
3820 type_in = get_vectype_for_scalar_type (type_in);
3821 if (!type_in)
3822 return;
3823 if (type_out)
3824 type_out = get_vectype_for_scalar_type (type_out);
3825 else
3826 type_out = type_in;
3827 if (!type_out)
3828 return;
3829 pattern_vectype = type_out;
3831 if (is_gimple_assign (pattern_stmt))
3832 code = gimple_assign_rhs_code (pattern_stmt);
3833 else
3835 gcc_assert (is_gimple_call (pattern_stmt));
3836 code = CALL_EXPR;
3839 optab = optab_for_tree_code (code, type_in, optab_default);
3840 vec_mode = TYPE_MODE (type_in);
3841 if (!optab
3842 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
3843 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
3844 return;
3847 /* Found a vectorizable pattern. */
3848 if (dump_enabled_p ())
3850 dump_printf_loc (MSG_NOTE, vect_location,
3851 "pattern recognized: ");
3852 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3855 /* Mark the stmts that are involved in the pattern. */
3856 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
3858 /* Patterns cannot be vectorized using SLP, because they change the order of
3859 computation. */
3860 if (loop_vinfo)
3861 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
3862 if (next == stmt)
3863 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
3865 /* It is possible that additional pattern stmts are created and inserted in
3866 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3867 relevant statements. */
3868 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3869 && (unsigned) i < (stmts_to_replace->length () - 1);
3870 i++)
3872 stmt_info = vinfo_for_stmt (stmt);
3873 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3874 if (dump_enabled_p ())
3876 dump_printf_loc (MSG_NOTE, vect_location,
3877 "additional pattern stmt: ");
3878 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3881 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
3886 /* Function vect_pattern_recog
3888 Input:
3889 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3890 computation idioms.
3892 Output - for each computation idiom that is detected we create a new stmt
3893 that provides the same functionality and that can be vectorized. We
3894 also record some information in the struct_stmt_info of the relevant
3895 stmts, as explained below:
3897 At the entry to this function we have the following stmts, with the
3898 following initial value in the STMT_VINFO fields:
3900 stmt in_pattern_p related_stmt vec_stmt
3901 S1: a_i = .... - - -
3902 S2: a_2 = ..use(a_i).. - - -
3903 S3: a_1 = ..use(a_2).. - - -
3904 S4: a_0 = ..use(a_1).. - - -
3905 S5: ... = ..use(a_0).. - - -
3907 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
3908 represented by a single stmt. We then:
3909 - create a new stmt S6 equivalent to the pattern (the stmt is not
3910 inserted into the code)
3911 - fill in the STMT_VINFO fields as follows:
3913 in_pattern_p related_stmt vec_stmt
3914 S1: a_i = .... - - -
3915 S2: a_2 = ..use(a_i).. - - -
3916 S3: a_1 = ..use(a_2).. - - -
3917 S4: a_0 = ..use(a_1).. true S6 -
3918 '---> S6: a_new = .... - S4 -
3919 S5: ... = ..use(a_0).. - - -
3921 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
3922 to each other through the RELATED_STMT field).
3924 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3925 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3926 remain irrelevant unless used by stmts other than S4.
3928 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
3929 (because they are marked as irrelevant). It will vectorize S6, and record
3930 a pointer to the new vector stmt VS6 from S6 (as usual).
3931 S4 will be skipped, and S5 will be vectorized as usual:
3933 in_pattern_p related_stmt vec_stmt
3934 S1: a_i = .... - - -
3935 S2: a_2 = ..use(a_i).. - - -
3936 S3: a_1 = ..use(a_2).. - - -
3937 > VS6: va_new = .... - - -
3938 S4: a_0 = ..use(a_1).. true S6 VS6
3939 '---> S6: a_new = .... - S4 VS6
3940 > VS5: ... = ..vuse(va_new).. - - -
3941 S5: ... = ..use(a_0).. - - -
3943 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
3944 elsewhere), and we'll end up with:
3946 VS6: va_new = ....
3947 VS5: ... = ..vuse(va_new)..
3949 In case of more than one pattern statements, e.g., widen-mult with
3950 intermediate type:
3952 S1 a_t = ;
3953 S2 a_T = (TYPE) a_t;
3954 '--> S3: a_it = (interm_type) a_t;
3955 S4 prod_T = a_T * CONST;
3956 '--> S5: prod_T' = a_it w* CONST;
3958 there may be other users of a_T outside the pattern. In that case S2 will
3959 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3960 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3961 be recorded in S3. */
3963 void
3964 vect_pattern_recog (vec_info *vinfo)
3966 struct loop *loop;
3967 basic_block *bbs;
3968 unsigned int nbbs;
3969 gimple_stmt_iterator si;
3970 unsigned int i, j;
3971 vect_recog_func_ptr vect_recog_func;
3972 auto_vec<gimple *, 1> stmts_to_replace;
3973 gimple *stmt;
3975 if (dump_enabled_p ())
3976 dump_printf_loc (MSG_NOTE, vect_location,
3977 "=== vect_pattern_recog ===\n");
3979 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
3981 loop = LOOP_VINFO_LOOP (loop_vinfo);
3982 bbs = LOOP_VINFO_BBS (loop_vinfo);
3983 nbbs = loop->num_nodes;
3985 /* Scan through the loop stmts, applying the pattern recognition
3986 functions starting at each stmt visited: */
3987 for (i = 0; i < nbbs; i++)
3989 basic_block bb = bbs[i];
3990 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3992 /* Scan over all generic vect_recog_xxx_pattern functions. */
3993 for (j = 0; j < NUM_PATTERNS; j++)
3995 vect_recog_func = vect_vect_recog_func_ptrs[j];
3996 vect_pattern_recog_1 (vect_recog_func, si,
3997 &stmts_to_replace);
4002 else
4004 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4005 for (si = bb_vinfo->region_begin;
4006 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4008 if ((stmt = gsi_stmt (si))
4009 && vinfo_for_stmt (stmt)
4010 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4011 continue;
4013 /* Scan over all generic vect_recog_xxx_pattern functions. */
4014 for (j = 0; j < NUM_PATTERNS; j++)
4016 vect_recog_func = vect_vect_recog_func_ptrs[j];
4017 vect_pattern_recog_1 (vect_recog_func, si,
4018 &stmts_to_replace);