[6/n] PR85694: Add a vect_get_internal_def helper
[official-gcc.git] / gcc / tree-vect-patterns.c
blob71b31aca21c7e2d0d10c0a8fbb9f6c26424d6620
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
50 /* Pattern recognition functions */
51 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
52 tree *);
53 static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
54 tree *);
55 static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
56 tree *);
57 static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
58 tree *);
59 static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
60 static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
61 tree *);
62 static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
63 tree *, tree *);
64 static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
65 static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
66 tree *, tree *);
67 static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
68 tree *, tree *);
70 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
71 tree *, tree *);
73 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
74 tree *, tree *);
75 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
76 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
77 static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
78 tree *);
80 struct vect_recog_func
82 vect_recog_func_ptr fn;
83 const char *name;
86 /* Note that ordering matters - the first pattern matching on a stmt
87 is taken which means usually the more complex one needs to preceed
88 the less comples onex (widen_sum only after dot_prod or sad for example). */
89 static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
90 { vect_recog_widen_mult_pattern, "widen_mult" },
91 { vect_recog_dot_prod_pattern, "dot_prod" },
92 { vect_recog_sad_pattern, "sad" },
93 { vect_recog_widen_sum_pattern, "widen_sum" },
94 { vect_recog_pow_pattern, "pow" },
95 { vect_recog_widen_shift_pattern, "widen_shift" },
96 { vect_recog_over_widening_pattern, "over_widening" },
97 { vect_recog_rotate_pattern, "rotate" },
98 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
99 { vect_recog_divmod_pattern, "divmod" },
100 { vect_recog_mult_pattern, "mult" },
101 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
102 { vect_recog_bool_pattern, "bool" },
103 /* This must come before mask conversion, and includes the parts
104 of mask conversion that are needed for gather and scatter
105 internal functions. */
106 { vect_recog_gather_scatter_pattern, "gather_scatter" },
107 { vect_recog_mask_conversion_pattern, "mask_conversion" }
110 static inline void
111 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
113 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
114 stmt);
117 static inline void
118 new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
120 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
121 append_pattern_def_seq (stmt_info, stmt);
124 /* Check whether STMT2 is in the same loop or basic block as STMT1.
125 Which of the two applies depends on whether we're currently doing
126 loop-based or basic-block-based vectorization, as determined by
127 the vinfo_for_stmt for STMT1 (which must be defined).
129 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
130 to be defined as well. */
132 static bool
133 vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
135 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
136 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
139 /* If the LHS of DEF_STMT has a single use, and that statement is
140 in the same loop or basic block, return it. */
142 static gimple *
143 vect_single_imm_use (gimple *def_stmt)
145 tree lhs = gimple_assign_lhs (def_stmt);
146 use_operand_p use_p;
147 gimple *use_stmt;
149 if (!single_imm_use (lhs, &use_p, &use_stmt))
150 return NULL;
152 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
153 return NULL;
155 return use_stmt;
158 /* If OP is defined by a statement that's being considered for vectorization,
159 return information about that statement, otherwise return NULL. */
161 static stmt_vec_info
162 vect_get_internal_def (vec_info *vinfo, tree op)
164 vect_def_type dt;
165 gimple *def_stmt;
166 if (TREE_CODE (op) != SSA_NAME
167 || !vect_is_simple_use (op, vinfo, &def_stmt, &dt)
168 || dt != vect_internal_def)
169 return NULL;
171 return vinfo_for_stmt (def_stmt);
174 /* Check whether NAME, an ssa-name used in USE_STMT,
175 is a result of a type promotion, such that:
176 DEF_STMT: NAME = NOP (name0)
177 If CHECK_SIGN is TRUE, check that either both types are signed or both are
178 unsigned. */
180 static bool
181 type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
182 tree *orig_type, gimple **def_stmt, bool *promotion)
184 gimple *dummy_gimple;
185 stmt_vec_info stmt_vinfo;
186 tree type = TREE_TYPE (name);
187 tree oprnd0;
188 enum vect_def_type dt;
190 stmt_vinfo = vinfo_for_stmt (use_stmt);
191 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
192 return false;
194 if (dt != vect_internal_def
195 && dt != vect_external_def && dt != vect_constant_def)
196 return false;
198 if (!*def_stmt)
199 return false;
201 if (dt == vect_internal_def)
203 stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
204 if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
205 return false;
208 if (!is_gimple_assign (*def_stmt))
209 return false;
211 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
212 return false;
214 oprnd0 = gimple_assign_rhs1 (*def_stmt);
216 *orig_type = TREE_TYPE (oprnd0);
217 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
218 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
219 return false;
221 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
222 *promotion = true;
223 else
224 *promotion = false;
226 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
227 return false;
229 return true;
232 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
233 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
235 static tree
236 vect_recog_temp_ssa_var (tree type, gimple *stmt)
238 return make_temp_ssa_name (type, stmt, "patt");
241 /* Return true if STMT_VINFO describes a reduction for which reassociation
242 is allowed. If STMT_INFO is part of a group, assume that it's part of
243 a reduction chain and optimistically assume that all statements
244 except the last allow reassociation. */
246 static bool
247 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
249 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
250 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
251 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
254 /* Function vect_recog_dot_prod_pattern
256 Try to find the following pattern:
258 type x_t, y_t;
259 TYPE1 prod;
260 TYPE2 sum = init;
261 loop:
262 sum_0 = phi <init, sum_1>
263 S1 x_t = ...
264 S2 y_t = ...
265 S3 x_T = (TYPE1) x_t;
266 S4 y_T = (TYPE1) y_t;
267 S5 prod = x_T * y_T;
268 [S6 prod = (TYPE2) prod; #optional]
269 S7 sum_1 = prod + sum_0;
271 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
272 same size of 'TYPE1' or bigger. This is a special case of a reduction
273 computation.
275 Input:
277 * STMTS: Contains a stmt from which the pattern search begins. In the
278 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
279 will be detected.
281 Output:
283 * TYPE_IN: The type of the input arguments to the pattern.
285 * TYPE_OUT: The type of the output of this pattern.
287 * Return value: A new stmt that will be used to replace the sequence of
288 stmts that constitute the pattern. In this case it will be:
289 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
291 Note: The dot-prod idiom is a widening reduction pattern that is
292 vectorized without preserving all the intermediate results. It
293 produces only N/2 (widened) results (by summing up pairs of
294 intermediate results) rather than all N results. Therefore, we
295 cannot allow this pattern when we want to get all the results and in
296 the correct order (as is the case when this computation is in an
297 inner-loop nested in an outer-loop that us being vectorized). */
299 static gimple *
300 vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
301 tree *type_out)
303 gimple *stmt, *last_stmt = (*stmts)[0];
304 tree oprnd0, oprnd1;
305 tree oprnd00, oprnd01;
306 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
307 vec_info *vinfo = stmt_vinfo->vinfo;
308 tree type, half_type;
309 gimple *pattern_stmt;
310 tree prod_type;
311 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
312 struct loop *loop;
313 tree var;
314 bool promotion;
316 if (!loop_info)
317 return NULL;
319 loop = LOOP_VINFO_LOOP (loop_info);
321 /* We don't allow changing the order of the computation in the inner-loop
322 when doing outer-loop vectorization. */
323 if (loop && nested_in_vect_loop_p (loop, last_stmt))
324 return NULL;
326 if (!is_gimple_assign (last_stmt))
327 return NULL;
329 type = gimple_expr_type (last_stmt);
331 /* Look for the following pattern
332 DX = (TYPE1) X;
333 DY = (TYPE1) Y;
334 DPROD = DX * DY;
335 DDPROD = (TYPE2) DPROD;
336 sum_1 = DDPROD + sum_0;
337 In which
338 - DX is double the size of X
339 - DY is double the size of Y
340 - DX, DY, DPROD all have the same type
341 - sum is the same size of DPROD or bigger
342 - sum has been recognized as a reduction variable.
344 This is equivalent to:
345 DPROD = X w* Y; #widen mult
346 sum_1 = DPROD w+ sum_0; #widen summation
348 DPROD = X w* Y; #widen mult
349 sum_1 = DPROD + sum_0; #summation
352 /* Starting from LAST_STMT, follow the defs of its uses in search
353 of the above pattern. */
355 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
356 return NULL;
358 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
359 return NULL;
361 if (!vect_reassociating_reduction_p (stmt_vinfo))
362 return NULL;
364 oprnd0 = gimple_assign_rhs1 (last_stmt);
365 oprnd1 = gimple_assign_rhs2 (last_stmt);
366 stmt = last_stmt;
368 gimple *def_stmt;
369 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
370 &promotion)
371 && promotion)
373 stmt = def_stmt;
374 oprnd0 = gimple_assign_rhs1 (stmt);
376 else
377 half_type = type;
379 /* So far so good. Since last_stmt was detected as a (summation) reduction,
380 we know that oprnd1 is the reduction variable (defined by a loop-header
381 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
382 Left to check that oprnd0 is defined by a (widen_)mult_expr */
383 if (TREE_CODE (oprnd0) != SSA_NAME)
384 return NULL;
386 prod_type = half_type;
387 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
388 if (!mult_vinfo)
389 return NULL;
391 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
392 inside the loop (in case we are analyzing an outer-loop). */
393 gassign *mult = dyn_cast <gassign *> (mult_vinfo->stmt);
394 if (!mult || gimple_assign_rhs_code (mult) != MULT_EXPR)
395 return NULL;
396 if (STMT_VINFO_IN_PATTERN_P (mult_vinfo))
398 /* Has been detected as a widening multiplication? */
400 mult = dyn_cast <gassign *> (STMT_VINFO_RELATED_STMT (mult_vinfo));
401 if (!mult || gimple_assign_rhs_code (mult) != WIDEN_MULT_EXPR)
402 return NULL;
403 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)
404 = STMT_VINFO_PATTERN_DEF_SEQ (mult_vinfo);
405 mult_vinfo = vinfo_for_stmt (mult);
406 gcc_assert (mult_vinfo);
407 gcc_assert (STMT_VINFO_DEF_TYPE (mult_vinfo) == vect_internal_def);
408 oprnd00 = gimple_assign_rhs1 (mult);
409 oprnd01 = gimple_assign_rhs2 (mult);
411 else
413 tree half_type0, half_type1;
414 gimple *def_stmt;
415 tree oprnd0, oprnd1;
417 oprnd0 = gimple_assign_rhs1 (mult);
418 oprnd1 = gimple_assign_rhs2 (mult);
419 if (!type_conversion_p (oprnd0, mult, true, &half_type0, &def_stmt,
420 &promotion)
421 || !promotion)
422 return NULL;
423 oprnd00 = gimple_assign_rhs1 (def_stmt);
424 if (!type_conversion_p (oprnd1, mult, true, &half_type1, &def_stmt,
425 &promotion)
426 || !promotion)
427 return NULL;
428 oprnd01 = gimple_assign_rhs1 (def_stmt);
429 if (!types_compatible_p (half_type0, half_type1))
430 return NULL;
431 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
432 return NULL;
435 half_type = TREE_TYPE (oprnd00);
436 *type_in = half_type;
437 *type_out = type;
439 /* Pattern detected. Create a stmt to be used to replace the pattern: */
440 var = vect_recog_temp_ssa_var (type, NULL);
441 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
442 oprnd00, oprnd01, oprnd1);
444 if (dump_enabled_p ())
446 dump_printf_loc (MSG_NOTE, vect_location,
447 "vect_recog_dot_prod_pattern: detected: ");
448 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
451 return pattern_stmt;
455 /* Function vect_recog_sad_pattern
457 Try to find the following Sum of Absolute Difference (SAD) pattern:
459 type x_t, y_t;
460 signed TYPE1 diff, abs_diff;
461 TYPE2 sum = init;
462 loop:
463 sum_0 = phi <init, sum_1>
464 S1 x_t = ...
465 S2 y_t = ...
466 S3 x_T = (TYPE1) x_t;
467 S4 y_T = (TYPE1) y_t;
468 S5 diff = x_T - y_T;
469 S6 abs_diff = ABS_EXPR <diff>;
470 [S7 abs_diff = (TYPE2) abs_diff; #optional]
471 S8 sum_1 = abs_diff + sum_0;
473 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
474 same size of 'TYPE1' or bigger. This is a special case of a reduction
475 computation.
477 Input:
479 * STMTS: Contains a stmt from which the pattern search begins. In the
480 example, when this function is called with S8, the pattern
481 {S3,S4,S5,S6,S7,S8} will be detected.
483 Output:
485 * TYPE_IN: The type of the input arguments to the pattern.
487 * TYPE_OUT: The type of the output of this pattern.
489 * Return value: A new stmt that will be used to replace the sequence of
490 stmts that constitute the pattern. In this case it will be:
491 SAD_EXPR <x_t, y_t, sum_0>
494 static gimple *
495 vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
496 tree *type_out)
498 gimple *last_stmt = (*stmts)[0];
499 tree sad_oprnd0, sad_oprnd1;
500 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
501 vec_info *vinfo = stmt_vinfo->vinfo;
502 tree half_type;
503 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
504 struct loop *loop;
505 bool promotion;
507 if (!loop_info)
508 return NULL;
510 loop = LOOP_VINFO_LOOP (loop_info);
512 /* We don't allow changing the order of the computation in the inner-loop
513 when doing outer-loop vectorization. */
514 if (loop && nested_in_vect_loop_p (loop, last_stmt))
515 return NULL;
517 if (!is_gimple_assign (last_stmt))
518 return NULL;
520 tree sum_type = gimple_expr_type (last_stmt);
522 /* Look for the following pattern
523 DX = (TYPE1) X;
524 DY = (TYPE1) Y;
525 DDIFF = DX - DY;
526 DAD = ABS_EXPR <DDIFF>;
527 DDPROD = (TYPE2) DPROD;
528 sum_1 = DAD + sum_0;
529 In which
530 - DX is at least double the size of X
531 - DY is at least double the size of Y
532 - DX, DY, DDIFF, DAD all have the same type
533 - sum is the same size of DAD or bigger
534 - sum has been recognized as a reduction variable.
536 This is equivalent to:
537 DDIFF = X w- Y; #widen sub
538 DAD = ABS_EXPR <DDIFF>;
539 sum_1 = DAD w+ sum_0; #widen summation
541 DDIFF = X w- Y; #widen sub
542 DAD = ABS_EXPR <DDIFF>;
543 sum_1 = DAD + sum_0; #summation
546 /* Starting from LAST_STMT, follow the defs of its uses in search
547 of the above pattern. */
549 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
550 return NULL;
552 tree plus_oprnd0, plus_oprnd1;
554 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
555 return NULL;
557 if (!vect_reassociating_reduction_p (stmt_vinfo))
558 return NULL;
560 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
561 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
563 /* The type conversion could be promotion, demotion,
564 or just signed -> unsigned. */
565 gimple *def_stmt;
566 if (type_conversion_p (plus_oprnd0, last_stmt, false,
567 &half_type, &def_stmt, &promotion))
568 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
569 else
570 half_type = sum_type;
572 /* So far so good. Since last_stmt was detected as a (summation) reduction,
573 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
574 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
575 Then check that plus_oprnd0 is defined by an abs_expr. */
577 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
578 return NULL;
580 tree abs_type = half_type;
581 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
582 if (!abs_stmt_vinfo)
583 return NULL;
585 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
586 inside the loop (in case we are analyzing an outer-loop). */
587 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
588 if (!abs_stmt
589 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
590 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
591 return NULL;
593 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
594 if (TYPE_UNSIGNED (abs_type))
595 return NULL;
597 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
599 if (TREE_CODE (abs_oprnd) != SSA_NAME)
600 return NULL;
602 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
603 if (!diff_stmt_vinfo)
604 return NULL;
606 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
607 inside the loop (in case we are analyzing an outer-loop). */
608 gassign *diff_stmt = dyn_cast <gassign *> (diff_stmt_vinfo->stmt);
609 if (!diff_stmt || gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
610 return NULL;
612 tree half_type0, half_type1;
614 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
615 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
617 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
618 &half_type0, &def_stmt, &promotion)
619 || !promotion)
620 return NULL;
621 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
623 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
624 &half_type1, &def_stmt, &promotion)
625 || !promotion)
626 return NULL;
627 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
629 if (!types_compatible_p (half_type0, half_type1))
630 return NULL;
631 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
632 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
633 return NULL;
635 *type_in = TREE_TYPE (sad_oprnd0);
636 *type_out = sum_type;
638 /* Pattern detected. Create a stmt to be used to replace the pattern: */
639 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
640 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
641 sad_oprnd1, plus_oprnd1);
643 if (dump_enabled_p ())
645 dump_printf_loc (MSG_NOTE, vect_location,
646 "vect_recog_sad_pattern: detected: ");
647 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
650 return pattern_stmt;
654 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
655 and LSHIFT_EXPR.
657 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
658 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
660 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
661 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
662 that satisfies the above restrictions, we can perform a widening opeartion
663 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
664 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
666 static bool
667 vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
668 tree const_oprnd, tree *oprnd,
669 gimple **wstmt, tree type,
670 tree *half_type, gimple *def_stmt)
672 tree new_type, new_oprnd;
674 if (code != MULT_EXPR && code != LSHIFT_EXPR)
675 return false;
677 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
678 || (code == LSHIFT_EXPR
679 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
680 != 1))
681 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
683 /* CONST_OPRND is a constant of HALF_TYPE. */
684 *oprnd = gimple_assign_rhs1 (def_stmt);
685 return true;
688 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
689 return false;
691 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
692 return false;
694 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
695 a type 2 times bigger than HALF_TYPE. */
696 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
697 TYPE_UNSIGNED (type));
698 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
699 || (code == LSHIFT_EXPR
700 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
701 return false;
703 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
704 *oprnd = gimple_assign_rhs1 (def_stmt);
705 new_oprnd = make_ssa_name (new_type);
706 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
707 *oprnd = new_oprnd;
709 *half_type = new_type;
710 return true;
714 /* Function vect_recog_widen_mult_pattern
716 Try to find the following pattern:
718 type1 a_t;
719 type2 b_t;
720 TYPE a_T, b_T, prod_T;
722 S1 a_t = ;
723 S2 b_t = ;
724 S3 a_T = (TYPE) a_t;
725 S4 b_T = (TYPE) b_t;
726 S5 prod_T = a_T * b_T;
728 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
730 Also detect unsigned cases:
732 unsigned type1 a_t;
733 unsigned type2 b_t;
734 unsigned TYPE u_prod_T;
735 TYPE a_T, b_T, prod_T;
737 S1 a_t = ;
738 S2 b_t = ;
739 S3 a_T = (TYPE) a_t;
740 S4 b_T = (TYPE) b_t;
741 S5 prod_T = a_T * b_T;
742 S6 u_prod_T = (unsigned TYPE) prod_T;
744 and multiplication by constants:
746 type a_t;
747 TYPE a_T, prod_T;
749 S1 a_t = ;
750 S3 a_T = (TYPE) a_t;
751 S5 prod_T = a_T * CONST;
753 A special case of multiplication by constants is when 'TYPE' is 4 times
754 bigger than 'type', but CONST fits an intermediate type 2 times smaller
755 than 'TYPE'. In that case we create an additional pattern stmt for S3
756 to create a variable of the intermediate type, and perform widen-mult
757 on the intermediate type as well:
759 type a_t;
760 interm_type a_it;
761 TYPE a_T, prod_T, prod_T';
763 S1 a_t = ;
764 S3 a_T = (TYPE) a_t;
765 '--> a_it = (interm_type) a_t;
766 S5 prod_T = a_T * CONST;
767 '--> prod_T' = a_it w* CONST;
769 Input/Output:
771 * STMTS: Contains a stmt from which the pattern search begins. In the
772 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
773 is detected. In case of unsigned widen-mult, the original stmt (S5) is
774 replaced with S6 in STMTS. In case of multiplication by a constant
775 of an intermediate type (the last case above), STMTS also contains S3
776 (inserted before S5).
778 Output:
780 * TYPE_IN: The type of the input arguments to the pattern.
782 * TYPE_OUT: The type of the output of this pattern.
784 * Return value: A new stmt that will be used to replace the sequence of
785 stmts that constitute the pattern. In this case it will be:
786 WIDEN_MULT <a_t, b_t>
787 If the result of WIDEN_MULT needs to be converted to a larger type, the
788 returned stmt will be this type conversion stmt.
791 static gimple *
792 vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
793 tree *type_in, tree *type_out)
795 gimple *last_stmt = stmts->pop ();
796 gimple *def_stmt0, *def_stmt1;
797 tree oprnd0, oprnd1;
798 tree type, half_type0, half_type1;
799 gimple *new_stmt = NULL, *pattern_stmt = NULL;
800 tree vectype, vecitype;
801 tree var;
802 enum tree_code dummy_code;
803 int dummy_int;
804 vec<tree> dummy_vec;
805 bool op1_ok;
806 bool promotion;
808 if (!is_gimple_assign (last_stmt))
809 return NULL;
811 type = gimple_expr_type (last_stmt);
813 /* Starting from LAST_STMT, follow the defs of its uses in search
814 of the above pattern. */
816 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
817 return NULL;
819 oprnd0 = gimple_assign_rhs1 (last_stmt);
820 oprnd1 = gimple_assign_rhs2 (last_stmt);
822 /* Check argument 0. */
823 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
824 &promotion)
825 || !promotion)
826 return NULL;
827 /* Check argument 1. */
828 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
829 &def_stmt1, &promotion);
831 if (op1_ok && promotion)
833 oprnd0 = gimple_assign_rhs1 (def_stmt0);
834 oprnd1 = gimple_assign_rhs1 (def_stmt1);
836 else
838 if (TREE_CODE (oprnd1) == INTEGER_CST
839 && TREE_CODE (half_type0) == INTEGER_TYPE
840 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
841 &oprnd0, &new_stmt, type,
842 &half_type0, def_stmt0))
844 half_type1 = half_type0;
845 oprnd1 = fold_convert (half_type1, oprnd1);
847 else
848 return NULL;
851 /* If the two arguments have different sizes, convert the one with
852 the smaller type into the larger type. */
853 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
855 /* If we already used up the single-stmt slot give up. */
856 if (new_stmt)
857 return NULL;
859 tree* oprnd = NULL;
860 gimple *def_stmt = NULL;
862 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
864 def_stmt = def_stmt0;
865 half_type0 = half_type1;
866 oprnd = &oprnd0;
868 else
870 def_stmt = def_stmt1;
871 half_type1 = half_type0;
872 oprnd = &oprnd1;
875 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
876 tree new_oprnd = make_ssa_name (half_type0);
877 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
878 *oprnd = new_oprnd;
881 /* Handle unsigned case. Look for
882 S6 u_prod_T = (unsigned TYPE) prod_T;
883 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
884 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
886 gimple *use_stmt;
887 tree use_lhs;
888 tree use_type;
890 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
891 return NULL;
893 use_stmt = vect_single_imm_use (last_stmt);
894 if (!use_stmt || !is_gimple_assign (use_stmt)
895 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
896 return NULL;
898 use_lhs = gimple_assign_lhs (use_stmt);
899 use_type = TREE_TYPE (use_lhs);
900 if (!INTEGRAL_TYPE_P (use_type)
901 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
902 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
903 return NULL;
905 type = use_type;
906 last_stmt = use_stmt;
909 if (!types_compatible_p (half_type0, half_type1))
910 return NULL;
912 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
913 to get an intermediate result of type ITYPE. In this case we need
914 to build a statement to convert this intermediate result to type TYPE. */
915 tree itype = type;
916 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
917 itype = build_nonstandard_integer_type
918 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
919 TYPE_UNSIGNED (type));
921 /* Pattern detected. */
922 if (dump_enabled_p ())
923 dump_printf_loc (MSG_NOTE, vect_location,
924 "vect_recog_widen_mult_pattern: detected:\n");
926 /* Check target support */
927 vectype = get_vectype_for_scalar_type (half_type0);
928 vecitype = get_vectype_for_scalar_type (itype);
929 if (!vectype
930 || !vecitype
931 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
932 vecitype, vectype,
933 &dummy_code, &dummy_code,
934 &dummy_int, &dummy_vec))
935 return NULL;
937 *type_in = vectype;
938 *type_out = get_vectype_for_scalar_type (type);
940 /* Pattern supported. Create a stmt to be used to replace the pattern: */
941 var = vect_recog_temp_ssa_var (itype, NULL);
942 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
944 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
945 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
947 /* If the original two operands have different sizes, we may need to convert
948 the smaller one into the larget type. If this is the case, at this point
949 the new stmt is already built. */
950 if (new_stmt)
952 append_pattern_def_seq (stmt_vinfo, new_stmt);
953 stmt_vec_info new_stmt_info
954 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
955 set_vinfo_for_stmt (new_stmt, new_stmt_info);
956 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
959 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
960 the result of the widen-mult operation into type TYPE. */
961 if (itype != type)
963 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
964 stmt_vec_info pattern_stmt_info
965 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
966 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
967 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
968 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
969 NOP_EXPR,
970 gimple_assign_lhs (pattern_stmt));
973 if (dump_enabled_p ())
974 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
976 stmts->safe_push (last_stmt);
977 return pattern_stmt;
981 /* Function vect_recog_pow_pattern
983 Try to find the following pattern:
985 x = POW (y, N);
987 with POW being one of pow, powf, powi, powif and N being
988 either 2 or 0.5.
990 Input:
992 * LAST_STMT: A stmt from which the pattern search begins.
994 Output:
996 * TYPE_IN: The type of the input arguments to the pattern.
998 * TYPE_OUT: The type of the output of this pattern.
1000 * Return value: A new stmt that will be used to replace the sequence of
1001 stmts that constitute the pattern. In this case it will be:
1002 x = x * x
1004 x = sqrt (x)
1007 static gimple *
1008 vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1009 tree *type_out)
1011 gimple *last_stmt = (*stmts)[0];
1012 tree base, exp;
1013 gimple *stmt;
1014 tree var;
1016 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1017 return NULL;
1019 switch (gimple_call_combined_fn (last_stmt))
1021 CASE_CFN_POW:
1022 CASE_CFN_POWI:
1023 break;
1025 default:
1026 return NULL;
1029 base = gimple_call_arg (last_stmt, 0);
1030 exp = gimple_call_arg (last_stmt, 1);
1031 if (TREE_CODE (exp) != REAL_CST
1032 && TREE_CODE (exp) != INTEGER_CST)
1034 if (flag_unsafe_math_optimizations
1035 && TREE_CODE (base) == REAL_CST
1036 && !gimple_call_internal_p (last_stmt))
1038 combined_fn log_cfn;
1039 built_in_function exp_bfn;
1040 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1042 case BUILT_IN_POW:
1043 log_cfn = CFN_BUILT_IN_LOG;
1044 exp_bfn = BUILT_IN_EXP;
1045 break;
1046 case BUILT_IN_POWF:
1047 log_cfn = CFN_BUILT_IN_LOGF;
1048 exp_bfn = BUILT_IN_EXPF;
1049 break;
1050 case BUILT_IN_POWL:
1051 log_cfn = CFN_BUILT_IN_LOGL;
1052 exp_bfn = BUILT_IN_EXPL;
1053 break;
1054 default:
1055 return NULL;
1057 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1058 tree exp_decl = builtin_decl_implicit (exp_bfn);
1059 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1060 does that, but if C is a power of 2, we want to use
1061 exp2 (log2 (C) * x) in the non-vectorized version, but for
1062 vectorization we don't have vectorized exp2. */
1063 if (logc
1064 && TREE_CODE (logc) == REAL_CST
1065 && exp_decl
1066 && lookup_attribute ("omp declare simd",
1067 DECL_ATTRIBUTES (exp_decl)))
1069 cgraph_node *node = cgraph_node::get_create (exp_decl);
1070 if (node->simd_clones == NULL)
1072 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1073 || node->definition)
1074 return NULL;
1075 expand_simd_clones (node);
1076 if (node->simd_clones == NULL)
1077 return NULL;
1079 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1080 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1081 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1082 new_pattern_def_seq (stmt_vinfo, g);
1083 *type_in = TREE_TYPE (base);
1084 *type_out = NULL_TREE;
1085 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1086 g = gimple_build_call (exp_decl, 1, def);
1087 gimple_call_set_lhs (g, res);
1088 return g;
1092 return NULL;
1095 /* We now have a pow or powi builtin function call with a constant
1096 exponent. */
1098 *type_out = NULL_TREE;
1100 /* Catch squaring. */
1101 if ((tree_fits_shwi_p (exp)
1102 && tree_to_shwi (exp) == 2)
1103 || (TREE_CODE (exp) == REAL_CST
1104 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1106 *type_in = TREE_TYPE (base);
1108 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1109 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1110 return stmt;
1113 /* Catch square root. */
1114 if (TREE_CODE (exp) == REAL_CST
1115 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1117 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1118 if (*type_in
1119 && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1120 OPTIMIZE_FOR_SPEED))
1122 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1123 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1124 gimple_call_set_lhs (stmt, var);
1125 gimple_call_set_nothrow (stmt, true);
1126 return stmt;
1130 return NULL;
1134 /* Function vect_recog_widen_sum_pattern
1136 Try to find the following pattern:
1138 type x_t;
1139 TYPE x_T, sum = init;
1140 loop:
1141 sum_0 = phi <init, sum_1>
1142 S1 x_t = *p;
1143 S2 x_T = (TYPE) x_t;
1144 S3 sum_1 = x_T + sum_0;
1146 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1147 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1148 a special case of a reduction computation.
1150 Input:
1152 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1153 when this function is called with S3, the pattern {S2,S3} will be detected.
1155 Output:
1157 * TYPE_IN: The type of the input arguments to the pattern.
1159 * TYPE_OUT: The type of the output of this pattern.
1161 * Return value: A new stmt that will be used to replace the sequence of
1162 stmts that constitute the pattern. In this case it will be:
1163 WIDEN_SUM <x_t, sum_0>
1165 Note: The widening-sum idiom is a widening reduction pattern that is
1166 vectorized without preserving all the intermediate results. It
1167 produces only N/2 (widened) results (by summing up pairs of
1168 intermediate results) rather than all N results. Therefore, we
1169 cannot allow this pattern when we want to get all the results and in
1170 the correct order (as is the case when this computation is in an
1171 inner-loop nested in an outer-loop that us being vectorized). */
1173 static gimple *
1174 vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1175 tree *type_out)
1177 gimple *stmt, *last_stmt = (*stmts)[0];
1178 tree oprnd0, oprnd1;
1179 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1180 tree type, half_type;
1181 gimple *pattern_stmt;
1182 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1183 struct loop *loop;
1184 tree var;
1185 bool promotion;
1187 if (!loop_info)
1188 return NULL;
1190 loop = LOOP_VINFO_LOOP (loop_info);
1192 /* We don't allow changing the order of the computation in the inner-loop
1193 when doing outer-loop vectorization. */
1194 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1195 return NULL;
1197 if (!is_gimple_assign (last_stmt))
1198 return NULL;
1200 type = gimple_expr_type (last_stmt);
1202 /* Look for the following pattern
1203 DX = (TYPE) X;
1204 sum_1 = DX + sum_0;
1205 In which DX is at least double the size of X, and sum_1 has been
1206 recognized as a reduction variable.
1209 /* Starting from LAST_STMT, follow the defs of its uses in search
1210 of the above pattern. */
1212 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1213 return NULL;
1215 if (!vect_reassociating_reduction_p (stmt_vinfo))
1216 return NULL;
1218 oprnd0 = gimple_assign_rhs1 (last_stmt);
1219 oprnd1 = gimple_assign_rhs2 (last_stmt);
1221 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1222 we know that oprnd1 is the reduction variable (defined by a loop-header
1223 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1224 Left to check that oprnd0 is defined by a cast from type 'type' to type
1225 'TYPE'. */
1227 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1228 &promotion)
1229 || !promotion)
1230 return NULL;
1232 oprnd0 = gimple_assign_rhs1 (stmt);
1233 *type_in = half_type;
1234 *type_out = type;
1236 /* Pattern detected. Create a stmt to be used to replace the pattern: */
1237 var = vect_recog_temp_ssa_var (type, NULL);
1238 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1240 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE, vect_location,
1243 "vect_recog_widen_sum_pattern: detected: ");
1244 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1247 return pattern_stmt;
1251 /* Return TRUE if the operation in STMT can be performed on a smaller type.
1253 Input:
1254 STMT - a statement to check.
1255 DEF - we support operations with two operands, one of which is constant.
1256 The other operand can be defined by a demotion operation, or by a
1257 previous statement in a sequence of over-promoted operations. In the
1258 later case DEF is used to replace that operand. (It is defined by a
1259 pattern statement we created for the previous statement in the
1260 sequence).
1262 Input/output:
1263 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1264 NULL, it's the type of DEF.
1265 STMTS - additional pattern statements. If a pattern statement (type
1266 conversion) is created in this function, its original statement is
1267 added to STMTS.
1269 Output:
1270 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1271 operands to use in the new pattern statement for STMT (will be created
1272 in vect_recog_over_widening_pattern ()).
1273 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1274 statements for STMT: the first one is a type promotion and the second
1275 one is the operation itself. We return the type promotion statement
1276 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1277 the second pattern statement. */
1279 static bool
1280 vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1281 tree *op0, tree *op1, gimple **new_def_stmt,
1282 vec<gimple *> *stmts)
1284 enum tree_code code;
1285 tree const_oprnd, oprnd;
1286 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1287 gimple *def_stmt, *new_stmt;
1288 bool first = false;
1289 bool promotion;
1291 *op0 = NULL_TREE;
1292 *op1 = NULL_TREE;
1293 *new_def_stmt = NULL;
1295 if (!is_gimple_assign (stmt))
1296 return false;
1298 code = gimple_assign_rhs_code (stmt);
1299 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1300 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1301 return false;
1303 oprnd = gimple_assign_rhs1 (stmt);
1304 const_oprnd = gimple_assign_rhs2 (stmt);
1305 type = gimple_expr_type (stmt);
1307 if (TREE_CODE (oprnd) != SSA_NAME
1308 || TREE_CODE (const_oprnd) != INTEGER_CST)
1309 return false;
1311 /* If oprnd has other uses besides that in stmt we cannot mark it
1312 as being part of a pattern only. */
1313 if (!has_single_use (oprnd))
1314 return false;
1316 /* If we are in the middle of a sequence, we use DEF from a previous
1317 statement. Otherwise, OPRND has to be a result of type promotion. */
1318 if (*new_type)
1320 half_type = *new_type;
1321 oprnd = def;
1323 else
1325 first = true;
1326 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1327 &promotion)
1328 || !promotion
1329 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1330 return false;
1333 /* Can we perform the operation on a smaller type? */
1334 switch (code)
1336 case BIT_IOR_EXPR:
1337 case BIT_XOR_EXPR:
1338 case BIT_AND_EXPR:
1339 if (!int_fits_type_p (const_oprnd, half_type))
1341 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1342 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1343 return false;
1345 interm_type = build_nonstandard_integer_type (
1346 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1347 if (!int_fits_type_p (const_oprnd, interm_type))
1348 return false;
1351 break;
1353 case LSHIFT_EXPR:
1354 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1355 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1356 return false;
1358 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1359 (e.g., if the original value was char, the shift amount is at most 8
1360 if we want to use short). */
1361 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1362 return false;
1364 interm_type = build_nonstandard_integer_type (
1365 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1367 if (!vect_supportable_shift (code, interm_type))
1368 return false;
1370 break;
1372 case RSHIFT_EXPR:
1373 if (vect_supportable_shift (code, half_type))
1374 break;
1376 /* Try intermediate type - HALF_TYPE is not supported. */
1377 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1378 return false;
1380 interm_type = build_nonstandard_integer_type (
1381 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1383 if (!vect_supportable_shift (code, interm_type))
1384 return false;
1386 break;
1388 default:
1389 gcc_unreachable ();
1392 /* There are four possible cases:
1393 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1394 the first statement in the sequence)
1395 a. The original, HALF_TYPE, is not enough - we replace the promotion
1396 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1397 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1398 promotion.
1399 2. OPRND is defined by a pattern statement we created.
1400 a. Its type is not sufficient for the operation, we create a new stmt:
1401 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1402 this statement in NEW_DEF_STMT, and it is later put in
1403 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1404 b. OPRND is good to use in the new statement. */
1405 if (first)
1407 if (interm_type)
1409 /* Replace the original type conversion HALF_TYPE->TYPE with
1410 HALF_TYPE->INTERM_TYPE. */
1411 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1413 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1414 /* Check if the already created pattern stmt is what we need. */
1415 if (!is_gimple_assign (new_stmt)
1416 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1417 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1418 return false;
1420 stmts->safe_push (def_stmt);
1421 oprnd = gimple_assign_lhs (new_stmt);
1423 else
1425 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1426 oprnd = gimple_assign_rhs1 (def_stmt);
1427 new_oprnd = make_ssa_name (interm_type);
1428 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1429 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1430 stmts->safe_push (def_stmt);
1431 oprnd = new_oprnd;
1434 else
1436 /* Retrieve the operand before the type promotion. */
1437 oprnd = gimple_assign_rhs1 (def_stmt);
1440 else
1442 if (interm_type)
1444 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1445 new_oprnd = make_ssa_name (interm_type);
1446 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1447 oprnd = new_oprnd;
1448 *new_def_stmt = new_stmt;
1451 /* Otherwise, OPRND is already set. */
1454 if (interm_type)
1455 *new_type = interm_type;
1456 else
1457 *new_type = half_type;
1459 *op0 = oprnd;
1460 *op1 = fold_convert (*new_type, const_oprnd);
1462 return true;
1466 /* Try to find a statement or a sequence of statements that can be performed
1467 on a smaller type:
1469 type x_t;
1470 TYPE x_T, res0_T, res1_T;
1471 loop:
1472 S1 x_t = *p;
1473 S2 x_T = (TYPE) x_t;
1474 S3 res0_T = op (x_T, C0);
1475 S4 res1_T = op (res0_T, C1);
1476 S5 ... = () res1_T; - type demotion
1478 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1479 constants.
1480 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1481 be 'type' or some intermediate type. For now, we expect S5 to be a type
1482 demotion operation. We also check that S3 and S4 have only one use. */
1484 static gimple *
1485 vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1486 tree *type_in, tree *type_out)
1488 gimple *stmt = stmts->pop ();
1489 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1490 *use_stmt = NULL;
1491 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1492 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1493 bool first;
1494 tree type = NULL;
1496 first = true;
1497 while (1)
1499 if (!vinfo_for_stmt (stmt)
1500 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1501 return NULL;
1503 new_def_stmt = NULL;
1504 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1505 &op0, &op1, &new_def_stmt,
1506 stmts))
1508 if (first)
1509 return NULL;
1510 else
1511 break;
1514 /* STMT can be performed on a smaller type. Check its uses. */
1515 use_stmt = vect_single_imm_use (stmt);
1516 if (!use_stmt || !is_gimple_assign (use_stmt))
1517 return NULL;
1519 /* Create pattern statement for STMT. */
1520 vectype = get_vectype_for_scalar_type (new_type);
1521 if (!vectype)
1522 return NULL;
1524 /* We want to collect all the statements for which we create pattern
1525 statetments, except for the case when the last statement in the
1526 sequence doesn't have a corresponding pattern statement. In such
1527 case we associate the last pattern statement with the last statement
1528 in the sequence. Therefore, we only add the original statement to
1529 the list if we know that it is not the last. */
1530 if (prev_stmt)
1531 stmts->safe_push (prev_stmt);
1533 var = vect_recog_temp_ssa_var (new_type, NULL);
1534 pattern_stmt
1535 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1536 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1537 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1539 if (dump_enabled_p ())
1541 dump_printf_loc (MSG_NOTE, vect_location,
1542 "created pattern stmt: ");
1543 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1546 type = gimple_expr_type (stmt);
1547 prev_stmt = stmt;
1548 stmt = use_stmt;
1550 first = false;
1553 /* We got a sequence. We expect it to end with a type demotion operation.
1554 Otherwise, we quit (for now). There are three possible cases: the
1555 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1556 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1557 NEW_TYPE differs (we create a new conversion statement). */
1558 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1560 use_lhs = gimple_assign_lhs (use_stmt);
1561 use_type = TREE_TYPE (use_lhs);
1562 /* Support only type demotion or signedess change. */
1563 if (!INTEGRAL_TYPE_P (use_type)
1564 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1565 return NULL;
1567 /* Check that NEW_TYPE is not bigger than the conversion result. */
1568 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1569 return NULL;
1571 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1572 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1574 /* Create NEW_TYPE->USE_TYPE conversion. */
1575 new_oprnd = make_ssa_name (use_type);
1576 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1577 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1579 *type_in = get_vectype_for_scalar_type (new_type);
1580 *type_out = get_vectype_for_scalar_type (use_type);
1582 /* We created a pattern statement for the last statement in the
1583 sequence, so we don't need to associate it with the pattern
1584 statement created for PREV_STMT. Therefore, we add PREV_STMT
1585 to the list in order to mark it later in vect_pattern_recog_1. */
1586 if (prev_stmt)
1587 stmts->safe_push (prev_stmt);
1589 else
1591 if (prev_stmt)
1592 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1593 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1595 *type_in = vectype;
1596 *type_out = NULL_TREE;
1599 stmts->safe_push (use_stmt);
1601 else
1602 /* TODO: support general case, create a conversion to the correct type. */
1603 return NULL;
1605 /* Pattern detected. */
1606 if (dump_enabled_p ())
1608 dump_printf_loc (MSG_NOTE, vect_location,
1609 "vect_recog_over_widening_pattern: detected: ");
1610 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1613 return pattern_stmt;
1616 /* Detect widening shift pattern:
1618 type a_t;
1619 TYPE a_T, res_T;
1621 S1 a_t = ;
1622 S2 a_T = (TYPE) a_t;
1623 S3 res_T = a_T << CONST;
1625 where type 'TYPE' is at least double the size of type 'type'.
1627 Also detect cases where the shift result is immediately converted
1628 to another type 'result_type' that is no larger in size than 'TYPE'.
1629 In those cases we perform a widen-shift that directly results in
1630 'result_type', to avoid a possible over-widening situation:
1632 type a_t;
1633 TYPE a_T, res_T;
1634 result_type res_result;
1636 S1 a_t = ;
1637 S2 a_T = (TYPE) a_t;
1638 S3 res_T = a_T << CONST;
1639 S4 res_result = (result_type) res_T;
1640 '--> res_result' = a_t w<< CONST;
1642 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1643 create an additional pattern stmt for S2 to create a variable of an
1644 intermediate type, and perform widen-shift on the intermediate type:
1646 type a_t;
1647 interm_type a_it;
1648 TYPE a_T, res_T, res_T';
1650 S1 a_t = ;
1651 S2 a_T = (TYPE) a_t;
1652 '--> a_it = (interm_type) a_t;
1653 S3 res_T = a_T << CONST;
1654 '--> res_T' = a_it <<* CONST;
1656 Input/Output:
1658 * STMTS: Contains a stmt from which the pattern search begins.
1659 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1660 in STMTS. When an intermediate type is used and a pattern statement is
1661 created for S2, we also put S2 here (before S3).
1663 Output:
1665 * TYPE_IN: The type of the input arguments to the pattern.
1667 * TYPE_OUT: The type of the output of this pattern.
1669 * Return value: A new stmt that will be used to replace the sequence of
1670 stmts that constitute the pattern. In this case it will be:
1671 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1673 static gimple *
1674 vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1675 tree *type_in, tree *type_out)
1677 gimple *last_stmt = stmts->pop ();
1678 gimple *def_stmt0;
1679 tree oprnd0, oprnd1;
1680 tree type, half_type0;
1681 gimple *pattern_stmt;
1682 tree vectype, vectype_out = NULL_TREE;
1683 tree var;
1684 enum tree_code dummy_code;
1685 int dummy_int;
1686 vec<tree> dummy_vec;
1687 gimple *use_stmt;
1688 bool promotion;
1690 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1691 return NULL;
1693 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1694 return NULL;
1696 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1697 return NULL;
1699 oprnd0 = gimple_assign_rhs1 (last_stmt);
1700 oprnd1 = gimple_assign_rhs2 (last_stmt);
1701 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1702 return NULL;
1704 /* Check operand 0: it has to be defined by a type promotion. */
1705 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1706 &promotion)
1707 || !promotion)
1708 return NULL;
1710 /* Check operand 1: has to be positive. We check that it fits the type
1711 in vect_handle_widen_op_by_const (). */
1712 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1713 return NULL;
1715 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1716 type = gimple_expr_type (last_stmt);
1718 /* Check for subsequent conversion to another type. */
1719 use_stmt = vect_single_imm_use (last_stmt);
1720 if (use_stmt && is_gimple_assign (use_stmt)
1721 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1722 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1724 tree use_lhs = gimple_assign_lhs (use_stmt);
1725 tree use_type = TREE_TYPE (use_lhs);
1727 if (INTEGRAL_TYPE_P (use_type)
1728 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1730 last_stmt = use_stmt;
1731 type = use_type;
1735 /* Check if this a widening operation. */
1736 gimple *wstmt = NULL;
1737 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1738 &oprnd0, &wstmt,
1739 type, &half_type0, def_stmt0))
1740 return NULL;
1742 /* Pattern detected. */
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_NOTE, vect_location,
1745 "vect_recog_widen_shift_pattern: detected:\n");
1747 /* Check target support. */
1748 vectype = get_vectype_for_scalar_type (half_type0);
1749 vectype_out = get_vectype_for_scalar_type (type);
1751 if (!vectype
1752 || !vectype_out
1753 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1754 vectype_out, vectype,
1755 &dummy_code, &dummy_code,
1756 &dummy_int, &dummy_vec))
1757 return NULL;
1759 *type_in = vectype;
1760 *type_out = vectype_out;
1762 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1763 var = vect_recog_temp_ssa_var (type, NULL);
1764 pattern_stmt
1765 = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1766 if (wstmt)
1768 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1769 new_pattern_def_seq (stmt_vinfo, wstmt);
1770 stmt_vec_info new_stmt_info
1771 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1772 set_vinfo_for_stmt (wstmt, new_stmt_info);
1773 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1776 if (dump_enabled_p ())
1777 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1779 stmts->safe_push (last_stmt);
1780 return pattern_stmt;
1783 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1785 type a_t, b_t, c_t;
1787 S0 a_t = b_t r<< c_t;
1789 Input/Output:
1791 * STMTS: Contains a stmt from which the pattern search begins,
1792 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1793 with a sequence:
1795 S1 d_t = -c_t;
1796 S2 e_t = d_t & (B - 1);
1797 S3 f_t = b_t << c_t;
1798 S4 g_t = b_t >> e_t;
1799 S0 a_t = f_t | g_t;
1801 where B is element bitsize of type.
1803 Output:
1805 * TYPE_IN: The type of the input arguments to the pattern.
1807 * TYPE_OUT: The type of the output of this pattern.
1809 * Return value: A new stmt that will be used to replace the rotate
1810 S0 stmt. */
1812 static gimple *
1813 vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1815 gimple *last_stmt = stmts->pop ();
1816 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1817 gimple *pattern_stmt, *def_stmt;
1818 enum tree_code rhs_code;
1819 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1820 vec_info *vinfo = stmt_vinfo->vinfo;
1821 enum vect_def_type dt;
1822 optab optab1, optab2;
1823 edge ext_def = NULL;
1825 if (!is_gimple_assign (last_stmt))
1826 return NULL;
1828 rhs_code = gimple_assign_rhs_code (last_stmt);
1829 switch (rhs_code)
1831 case LROTATE_EXPR:
1832 case RROTATE_EXPR:
1833 break;
1834 default:
1835 return NULL;
1838 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1839 return NULL;
1841 lhs = gimple_assign_lhs (last_stmt);
1842 oprnd0 = gimple_assign_rhs1 (last_stmt);
1843 type = TREE_TYPE (oprnd0);
1844 oprnd1 = gimple_assign_rhs2 (last_stmt);
1845 if (TREE_CODE (oprnd0) != SSA_NAME
1846 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1847 || !INTEGRAL_TYPE_P (type)
1848 || !TYPE_UNSIGNED (type))
1849 return NULL;
1851 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1852 return NULL;
1854 if (dt != vect_internal_def
1855 && dt != vect_constant_def
1856 && dt != vect_external_def)
1857 return NULL;
1859 vectype = get_vectype_for_scalar_type (type);
1860 if (vectype == NULL_TREE)
1861 return NULL;
1863 /* If vector/vector or vector/scalar rotate is supported by the target,
1864 don't do anything here. */
1865 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1866 if (optab1
1867 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1868 return NULL;
1870 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1872 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1873 if (optab2
1874 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1875 return NULL;
1878 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1879 don't do anything here either. */
1880 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1881 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1882 if (!optab1
1883 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1884 || !optab2
1885 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1887 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1888 return NULL;
1889 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1890 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1891 if (!optab1
1892 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1893 || !optab2
1894 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1895 return NULL;
1898 *type_in = vectype;
1899 *type_out = vectype;
1900 if (*type_in == NULL_TREE)
1901 return NULL;
1903 if (dt == vect_external_def
1904 && TREE_CODE (oprnd1) == SSA_NAME
1905 && is_a <loop_vec_info> (vinfo))
1907 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1908 ext_def = loop_preheader_edge (loop);
1909 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1911 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1912 if (bb == NULL
1913 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1914 ext_def = NULL;
1918 def = NULL_TREE;
1919 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1920 if (TREE_CODE (oprnd1) == INTEGER_CST
1921 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1922 def = oprnd1;
1923 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1925 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1926 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1927 && TYPE_PRECISION (TREE_TYPE (rhs1))
1928 == TYPE_PRECISION (type))
1929 def = rhs1;
1932 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1933 if (def == NULL_TREE)
1935 def = vect_recog_temp_ssa_var (type, NULL);
1936 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1937 if (ext_def)
1939 basic_block new_bb
1940 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1941 gcc_assert (!new_bb);
1943 else
1944 append_pattern_def_seq (stmt_vinfo, def_stmt);
1946 stype = TREE_TYPE (def);
1947 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1949 if (TREE_CODE (def) == INTEGER_CST)
1951 if (!tree_fits_uhwi_p (def)
1952 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
1953 || integer_zerop (def))
1954 return NULL;
1955 def2 = build_int_cst (stype,
1956 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
1958 else
1960 tree vecstype = get_vectype_for_scalar_type (stype);
1961 stmt_vec_info def_stmt_vinfo;
1963 if (vecstype == NULL_TREE)
1964 return NULL;
1965 def2 = vect_recog_temp_ssa_var (stype, NULL);
1966 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
1967 if (ext_def)
1969 basic_block new_bb
1970 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1971 gcc_assert (!new_bb);
1973 else
1975 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1976 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1977 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1978 append_pattern_def_seq (stmt_vinfo, def_stmt);
1981 def2 = vect_recog_temp_ssa_var (stype, NULL);
1982 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
1983 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1984 gimple_assign_lhs (def_stmt), mask);
1985 if (ext_def)
1987 basic_block new_bb
1988 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1989 gcc_assert (!new_bb);
1991 else
1993 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
1994 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1995 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1996 append_pattern_def_seq (stmt_vinfo, def_stmt);
2000 var1 = vect_recog_temp_ssa_var (type, NULL);
2001 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2002 ? LSHIFT_EXPR : RSHIFT_EXPR,
2003 oprnd0, def);
2004 append_pattern_def_seq (stmt_vinfo, def_stmt);
2006 var2 = vect_recog_temp_ssa_var (type, NULL);
2007 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2008 ? RSHIFT_EXPR : LSHIFT_EXPR,
2009 oprnd0, def2);
2010 append_pattern_def_seq (stmt_vinfo, def_stmt);
2012 /* Pattern detected. */
2013 if (dump_enabled_p ())
2014 dump_printf_loc (MSG_NOTE, vect_location,
2015 "vect_recog_rotate_pattern: detected:\n");
2017 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2018 var = vect_recog_temp_ssa_var (type, NULL);
2019 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2021 if (dump_enabled_p ())
2022 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2024 stmts->safe_push (last_stmt);
2025 return pattern_stmt;
2028 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2029 vectorized:
2031 type a_t;
2032 TYPE b_T, res_T;
2034 S1 a_t = ;
2035 S2 b_T = ;
2036 S3 res_T = b_T op a_t;
2038 where type 'TYPE' is a type with different size than 'type',
2039 and op is <<, >> or rotate.
2041 Also detect cases:
2043 type a_t;
2044 TYPE b_T, c_T, res_T;
2046 S0 c_T = ;
2047 S1 a_t = (type) c_T;
2048 S2 b_T = ;
2049 S3 res_T = b_T op a_t;
2051 Input/Output:
2053 * STMTS: Contains a stmt from which the pattern search begins,
2054 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2055 with a shift/rotate which has same type on both operands, in the
2056 second case just b_T op c_T, in the first case with added cast
2057 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2059 Output:
2061 * TYPE_IN: The type of the input arguments to the pattern.
2063 * TYPE_OUT: The type of the output of this pattern.
2065 * Return value: A new stmt that will be used to replace the shift/rotate
2066 S3 stmt. */
2068 static gimple *
2069 vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2070 tree *type_in, tree *type_out)
2072 gimple *last_stmt = stmts->pop ();
2073 tree oprnd0, oprnd1, lhs, var;
2074 gimple *pattern_stmt;
2075 enum tree_code rhs_code;
2076 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2077 vec_info *vinfo = stmt_vinfo->vinfo;
2079 if (!is_gimple_assign (last_stmt))
2080 return NULL;
2082 rhs_code = gimple_assign_rhs_code (last_stmt);
2083 switch (rhs_code)
2085 case LSHIFT_EXPR:
2086 case RSHIFT_EXPR:
2087 case LROTATE_EXPR:
2088 case RROTATE_EXPR:
2089 break;
2090 default:
2091 return NULL;
2094 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2095 return NULL;
2097 lhs = gimple_assign_lhs (last_stmt);
2098 oprnd0 = gimple_assign_rhs1 (last_stmt);
2099 oprnd1 = gimple_assign_rhs2 (last_stmt);
2100 if (TREE_CODE (oprnd0) != SSA_NAME
2101 || TREE_CODE (oprnd1) != SSA_NAME
2102 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2103 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2104 || TYPE_PRECISION (TREE_TYPE (lhs))
2105 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2106 return NULL;
2108 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2109 if (!def_vinfo)
2110 return NULL;
2112 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2113 *type_out = *type_in;
2114 if (*type_in == NULL_TREE)
2115 return NULL;
2117 tree def = NULL_TREE;
2118 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2119 if (!STMT_VINFO_IN_PATTERN_P (def_vinfo)
2120 && def_stmt
2121 && gimple_assign_cast_p (def_stmt))
2123 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2124 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2125 && TYPE_PRECISION (TREE_TYPE (rhs1))
2126 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2128 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2129 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2130 def = rhs1;
2131 else
2133 tree mask
2134 = build_low_bits_mask (TREE_TYPE (rhs1),
2135 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2136 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2137 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2138 stmt_vec_info new_stmt_info
2139 = new_stmt_vec_info (def_stmt, vinfo);
2140 set_vinfo_for_stmt (def_stmt, new_stmt_info);
2141 STMT_VINFO_VECTYPE (new_stmt_info)
2142 = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2143 new_pattern_def_seq (stmt_vinfo, def_stmt);
2148 if (def == NULL_TREE)
2150 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2151 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2152 new_pattern_def_seq (stmt_vinfo, def_stmt);
2155 /* Pattern detected. */
2156 if (dump_enabled_p ())
2157 dump_printf_loc (MSG_NOTE, vect_location,
2158 "vect_recog_vector_vector_shift_pattern: detected:\n");
2160 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2161 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2162 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2164 if (dump_enabled_p ())
2165 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2167 stmts->safe_push (last_stmt);
2168 return pattern_stmt;
2171 /* Return true iff the target has a vector optab implementing the operation
2172 CODE on type VECTYPE. */
2174 static bool
2175 target_has_vecop_for_code (tree_code code, tree vectype)
2177 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2178 return voptab
2179 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2182 /* Verify that the target has optabs of VECTYPE to perform all the steps
2183 needed by the multiplication-by-immediate synthesis algorithm described by
2184 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2185 present. Return true iff the target supports all the steps. */
2187 static bool
2188 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2189 tree vectype, bool synth_shift_p)
2191 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2192 return false;
2194 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2195 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2197 if (var == negate_variant
2198 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2199 return false;
2201 /* If we must synthesize shifts with additions make sure that vector
2202 addition is available. */
2203 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2204 return false;
2206 for (int i = 1; i < alg->ops; i++)
2208 switch (alg->op[i])
2210 case alg_shift:
2211 break;
2212 case alg_add_t_m2:
2213 case alg_add_t2_m:
2214 case alg_add_factor:
2215 if (!supports_vplus)
2216 return false;
2217 break;
2218 case alg_sub_t_m2:
2219 case alg_sub_t2_m:
2220 case alg_sub_factor:
2221 if (!supports_vminus)
2222 return false;
2223 break;
2224 case alg_unknown:
2225 case alg_m:
2226 case alg_zero:
2227 case alg_impossible:
2228 return false;
2229 default:
2230 gcc_unreachable ();
2234 return true;
2237 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2238 putting the final result in DEST. Append all statements but the last into
2239 VINFO. Return the last statement. */
2241 static gimple *
2242 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2243 stmt_vec_info vinfo)
2245 HOST_WIDE_INT i;
2246 tree itype = TREE_TYPE (op);
2247 tree prev_res = op;
2248 gcc_assert (amnt >= 0);
2249 for (i = 0; i < amnt; i++)
2251 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2252 : dest;
2253 gimple *stmt
2254 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2255 prev_res = tmp_var;
2256 if (i < amnt - 1)
2257 append_pattern_def_seq (vinfo, stmt);
2258 else
2259 return stmt;
2261 gcc_unreachable ();
2262 return NULL;
2265 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2266 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2267 the process if necessary. Append the resulting assignment statements
2268 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2269 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2270 left shifts using additions. */
2272 static tree
2273 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2274 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2276 if (integer_zerop (op2)
2277 && (code == LSHIFT_EXPR
2278 || code == PLUS_EXPR))
2280 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2281 return op1;
2284 gimple *stmt;
2285 tree itype = TREE_TYPE (op1);
2286 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2288 if (code == LSHIFT_EXPR
2289 && synth_shift_p)
2291 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2292 stmt_vinfo);
2293 append_pattern_def_seq (stmt_vinfo, stmt);
2294 return tmp_var;
2297 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2298 append_pattern_def_seq (stmt_vinfo, stmt);
2299 return tmp_var;
2302 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2303 and simple arithmetic operations to be vectorized. Record the statements
2304 produced in STMT_VINFO and return the last statement in the sequence or
2305 NULL if it's not possible to synthesize such a multiplication.
2306 This function mirrors the behavior of expand_mult_const in expmed.c but
2307 works on tree-ssa form. */
2309 static gimple *
2310 vect_synth_mult_by_constant (tree op, tree val,
2311 stmt_vec_info stmt_vinfo)
2313 tree itype = TREE_TYPE (op);
2314 machine_mode mode = TYPE_MODE (itype);
2315 struct algorithm alg;
2316 mult_variant variant;
2317 if (!tree_fits_shwi_p (val))
2318 return NULL;
2320 /* Multiplication synthesis by shifts, adds and subs can introduce
2321 signed overflow where the original operation didn't. Perform the
2322 operations on an unsigned type and cast back to avoid this.
2323 In the future we may want to relax this for synthesis algorithms
2324 that we can prove do not cause unexpected overflow. */
2325 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2327 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2329 /* Targets that don't support vector shifts but support vector additions
2330 can synthesize shifts that way. */
2331 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2333 HOST_WIDE_INT hwval = tree_to_shwi (val);
2334 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2335 The vectorizer's benefit analysis will decide whether it's beneficial
2336 to do this. */
2337 bool possible = choose_mult_variant (mode, hwval, &alg,
2338 &variant, MAX_COST);
2339 if (!possible)
2340 return NULL;
2342 tree vectype = get_vectype_for_scalar_type (multtype);
2344 if (!vectype
2345 || !target_supports_mult_synth_alg (&alg, variant,
2346 vectype, synth_shift_p))
2347 return NULL;
2349 tree accumulator;
2351 /* Clear out the sequence of statements so we can populate it below. */
2352 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2353 gimple *stmt = NULL;
2355 if (cast_to_unsigned_p)
2357 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2358 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2359 append_pattern_def_seq (stmt_vinfo, stmt);
2360 op = tmp_op;
2363 if (alg.op[0] == alg_zero)
2364 accumulator = build_int_cst (multtype, 0);
2365 else
2366 accumulator = op;
2368 bool needs_fixup = (variant == negate_variant)
2369 || (variant == add_variant);
2371 for (int i = 1; i < alg.ops; i++)
2373 tree shft_log = build_int_cst (multtype, alg.log[i]);
2374 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2375 tree tmp_var = NULL_TREE;
2377 switch (alg.op[i])
2379 case alg_shift:
2380 if (synth_shift_p)
2381 stmt
2382 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2383 stmt_vinfo);
2384 else
2385 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2386 shft_log);
2387 break;
2388 case alg_add_t_m2:
2389 tmp_var
2390 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2391 stmt_vinfo, synth_shift_p);
2392 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2393 tmp_var);
2394 break;
2395 case alg_sub_t_m2:
2396 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2397 shft_log, stmt_vinfo,
2398 synth_shift_p);
2399 /* In some algorithms the first step involves zeroing the
2400 accumulator. If subtracting from such an accumulator
2401 just emit the negation directly. */
2402 if (integer_zerop (accumulator))
2403 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2404 else
2405 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2406 tmp_var);
2407 break;
2408 case alg_add_t2_m:
2409 tmp_var
2410 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2411 stmt_vinfo, synth_shift_p);
2412 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2413 break;
2414 case alg_sub_t2_m:
2415 tmp_var
2416 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2417 stmt_vinfo, synth_shift_p);
2418 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2419 break;
2420 case alg_add_factor:
2421 tmp_var
2422 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2423 stmt_vinfo, synth_shift_p);
2424 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2425 tmp_var);
2426 break;
2427 case alg_sub_factor:
2428 tmp_var
2429 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2430 stmt_vinfo, synth_shift_p);
2431 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2432 accumulator);
2433 break;
2434 default:
2435 gcc_unreachable ();
2437 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2438 but rather return it directly. */
2440 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2441 append_pattern_def_seq (stmt_vinfo, stmt);
2442 accumulator = accum_tmp;
2444 if (variant == negate_variant)
2446 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2447 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2448 accumulator = accum_tmp;
2449 if (cast_to_unsigned_p)
2450 append_pattern_def_seq (stmt_vinfo, stmt);
2452 else if (variant == add_variant)
2454 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2455 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2456 accumulator = accum_tmp;
2457 if (cast_to_unsigned_p)
2458 append_pattern_def_seq (stmt_vinfo, stmt);
2460 /* Move back to a signed if needed. */
2461 if (cast_to_unsigned_p)
2463 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2464 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2467 return stmt;
2470 /* Detect multiplication by constant and convert it into a sequence of
2471 shifts and additions, subtractions, negations. We reuse the
2472 choose_mult_variant algorithms from expmed.c
2474 Input/Output:
2476 STMTS: Contains a stmt from which the pattern search begins,
2477 i.e. the mult stmt.
2479 Output:
2481 * TYPE_IN: The type of the input arguments to the pattern.
2483 * TYPE_OUT: The type of the output of this pattern.
2485 * Return value: A new stmt that will be used to replace
2486 the multiplication. */
2488 static gimple *
2489 vect_recog_mult_pattern (vec<gimple *> *stmts,
2490 tree *type_in, tree *type_out)
2492 gimple *last_stmt = stmts->pop ();
2493 tree oprnd0, oprnd1, vectype, itype;
2494 gimple *pattern_stmt;
2495 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2497 if (!is_gimple_assign (last_stmt))
2498 return NULL;
2500 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2501 return NULL;
2503 oprnd0 = gimple_assign_rhs1 (last_stmt);
2504 oprnd1 = gimple_assign_rhs2 (last_stmt);
2505 itype = TREE_TYPE (oprnd0);
2507 if (TREE_CODE (oprnd0) != SSA_NAME
2508 || TREE_CODE (oprnd1) != INTEGER_CST
2509 || !INTEGRAL_TYPE_P (itype)
2510 || !type_has_mode_precision_p (itype))
2511 return NULL;
2513 vectype = get_vectype_for_scalar_type (itype);
2514 if (vectype == NULL_TREE)
2515 return NULL;
2517 /* If the target can handle vectorized multiplication natively,
2518 don't attempt to optimize this. */
2519 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2520 if (mul_optab != unknown_optab)
2522 machine_mode vec_mode = TYPE_MODE (vectype);
2523 int icode = (int) optab_handler (mul_optab, vec_mode);
2524 if (icode != CODE_FOR_nothing)
2525 return NULL;
2528 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2529 if (!pattern_stmt)
2530 return NULL;
2532 /* Pattern detected. */
2533 if (dump_enabled_p ())
2534 dump_printf_loc (MSG_NOTE, vect_location,
2535 "vect_recog_mult_pattern: detected:\n");
2537 if (dump_enabled_p ())
2538 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2539 pattern_stmt,0);
2541 stmts->safe_push (last_stmt);
2542 *type_in = vectype;
2543 *type_out = vectype;
2545 return pattern_stmt;
2548 /* Detect a signed division by a constant that wouldn't be
2549 otherwise vectorized:
2551 type a_t, b_t;
2553 S1 a_t = b_t / N;
2555 where type 'type' is an integral type and N is a constant.
2557 Similarly handle modulo by a constant:
2559 S4 a_t = b_t % N;
2561 Input/Output:
2563 * STMTS: Contains a stmt from which the pattern search begins,
2564 i.e. the division stmt. S1 is replaced by if N is a power
2565 of two constant and type is signed:
2566 S3 y_t = b_t < 0 ? N - 1 : 0;
2567 S2 x_t = b_t + y_t;
2568 S1' a_t = x_t >> log2 (N);
2570 S4 is replaced if N is a power of two constant and
2571 type is signed by (where *_T temporaries have unsigned type):
2572 S9 y_T = b_t < 0 ? -1U : 0U;
2573 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2574 S7 z_t = (type) z_T;
2575 S6 w_t = b_t + z_t;
2576 S5 x_t = w_t & (N - 1);
2577 S4' a_t = x_t - z_t;
2579 Output:
2581 * TYPE_IN: The type of the input arguments to the pattern.
2583 * TYPE_OUT: The type of the output of this pattern.
2585 * Return value: A new stmt that will be used to replace the division
2586 S1 or modulo S4 stmt. */
2588 static gimple *
2589 vect_recog_divmod_pattern (vec<gimple *> *stmts,
2590 tree *type_in, tree *type_out)
2592 gimple *last_stmt = stmts->pop ();
2593 tree oprnd0, oprnd1, vectype, itype, cond;
2594 gimple *pattern_stmt, *def_stmt;
2595 enum tree_code rhs_code;
2596 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2597 vec_info *vinfo = stmt_vinfo->vinfo;
2598 optab optab;
2599 tree q;
2600 int dummy_int, prec;
2601 stmt_vec_info def_stmt_vinfo;
2603 if (!is_gimple_assign (last_stmt))
2604 return NULL;
2606 rhs_code = gimple_assign_rhs_code (last_stmt);
2607 switch (rhs_code)
2609 case TRUNC_DIV_EXPR:
2610 case TRUNC_MOD_EXPR:
2611 break;
2612 default:
2613 return NULL;
2616 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2617 return NULL;
2619 oprnd0 = gimple_assign_rhs1 (last_stmt);
2620 oprnd1 = gimple_assign_rhs2 (last_stmt);
2621 itype = TREE_TYPE (oprnd0);
2622 if (TREE_CODE (oprnd0) != SSA_NAME
2623 || TREE_CODE (oprnd1) != INTEGER_CST
2624 || TREE_CODE (itype) != INTEGER_TYPE
2625 || !type_has_mode_precision_p (itype))
2626 return NULL;
2628 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2629 vectype = get_vectype_for_scalar_type (itype);
2630 if (vectype == NULL_TREE)
2631 return NULL;
2633 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2635 /* If the target can handle vectorized division or modulo natively,
2636 don't attempt to optimize this, since native division is likely
2637 to give smaller code. */
2638 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2639 if (optab != unknown_optab)
2641 machine_mode vec_mode = TYPE_MODE (vectype);
2642 int icode = (int) optab_handler (optab, vec_mode);
2643 if (icode != CODE_FOR_nothing)
2644 return NULL;
2648 prec = TYPE_PRECISION (itype);
2649 if (integer_pow2p (oprnd1))
2651 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2652 return NULL;
2654 /* Pattern detected. */
2655 if (dump_enabled_p ())
2656 dump_printf_loc (MSG_NOTE, vect_location,
2657 "vect_recog_divmod_pattern: detected:\n");
2659 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2660 build_int_cst (itype, 0));
2661 if (rhs_code == TRUNC_DIV_EXPR)
2663 tree var = vect_recog_temp_ssa_var (itype, NULL);
2664 tree shift;
2665 def_stmt
2666 = gimple_build_assign (var, COND_EXPR, cond,
2667 fold_build2 (MINUS_EXPR, itype, oprnd1,
2668 build_int_cst (itype, 1)),
2669 build_int_cst (itype, 0));
2670 new_pattern_def_seq (stmt_vinfo, def_stmt);
2671 var = vect_recog_temp_ssa_var (itype, NULL);
2672 def_stmt
2673 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2674 gimple_assign_lhs (def_stmt));
2675 append_pattern_def_seq (stmt_vinfo, def_stmt);
2677 shift = build_int_cst (itype, tree_log2 (oprnd1));
2678 pattern_stmt
2679 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2680 RSHIFT_EXPR, var, shift);
2682 else
2684 tree signmask;
2685 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2686 if (compare_tree_int (oprnd1, 2) == 0)
2688 signmask = vect_recog_temp_ssa_var (itype, NULL);
2689 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2690 build_int_cst (itype, 1),
2691 build_int_cst (itype, 0));
2692 append_pattern_def_seq (stmt_vinfo, def_stmt);
2694 else
2696 tree utype
2697 = build_nonstandard_integer_type (prec, 1);
2698 tree vecutype = get_vectype_for_scalar_type (utype);
2699 tree shift
2700 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2701 - tree_log2 (oprnd1));
2702 tree var = vect_recog_temp_ssa_var (utype, NULL);
2704 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2705 build_int_cst (utype, -1),
2706 build_int_cst (utype, 0));
2707 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2708 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2709 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2710 append_pattern_def_seq (stmt_vinfo, def_stmt);
2711 var = vect_recog_temp_ssa_var (utype, NULL);
2712 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2713 gimple_assign_lhs (def_stmt),
2714 shift);
2715 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2716 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2717 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2718 append_pattern_def_seq (stmt_vinfo, def_stmt);
2719 signmask = vect_recog_temp_ssa_var (itype, NULL);
2720 def_stmt
2721 = gimple_build_assign (signmask, NOP_EXPR, var);
2722 append_pattern_def_seq (stmt_vinfo, def_stmt);
2724 def_stmt
2725 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2726 PLUS_EXPR, oprnd0, signmask);
2727 append_pattern_def_seq (stmt_vinfo, def_stmt);
2728 def_stmt
2729 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2730 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2731 fold_build2 (MINUS_EXPR, itype, oprnd1,
2732 build_int_cst (itype, 1)));
2733 append_pattern_def_seq (stmt_vinfo, def_stmt);
2735 pattern_stmt
2736 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2737 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2738 signmask);
2741 if (dump_enabled_p ())
2742 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2745 stmts->safe_push (last_stmt);
2747 *type_in = vectype;
2748 *type_out = vectype;
2749 return pattern_stmt;
2752 if (prec > HOST_BITS_PER_WIDE_INT
2753 || integer_zerop (oprnd1))
2754 return NULL;
2756 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2757 return NULL;
2759 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2761 if (TYPE_UNSIGNED (itype))
2763 unsigned HOST_WIDE_INT mh, ml;
2764 int pre_shift, post_shift;
2765 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2766 & GET_MODE_MASK (itype_mode));
2767 tree t1, t2, t3, t4;
2769 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2770 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2771 return NULL;
2773 /* Find a suitable multiplier and right shift count
2774 instead of multiplying with D. */
2775 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2777 /* If the suggested multiplier is more than SIZE bits, we can do better
2778 for even divisors, using an initial right shift. */
2779 if (mh != 0 && (d & 1) == 0)
2781 pre_shift = ctz_or_zero (d);
2782 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2783 &ml, &post_shift, &dummy_int);
2784 gcc_assert (!mh);
2786 else
2787 pre_shift = 0;
2789 if (mh != 0)
2791 if (post_shift - 1 >= prec)
2792 return NULL;
2794 /* t1 = oprnd0 h* ml;
2795 t2 = oprnd0 - t1;
2796 t3 = t2 >> 1;
2797 t4 = t1 + t3;
2798 q = t4 >> (post_shift - 1); */
2799 t1 = vect_recog_temp_ssa_var (itype, NULL);
2800 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2801 build_int_cst (itype, ml));
2802 append_pattern_def_seq (stmt_vinfo, def_stmt);
2804 t2 = vect_recog_temp_ssa_var (itype, NULL);
2805 def_stmt
2806 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2807 append_pattern_def_seq (stmt_vinfo, def_stmt);
2809 t3 = vect_recog_temp_ssa_var (itype, NULL);
2810 def_stmt
2811 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2812 append_pattern_def_seq (stmt_vinfo, def_stmt);
2814 t4 = vect_recog_temp_ssa_var (itype, NULL);
2815 def_stmt
2816 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2818 if (post_shift != 1)
2820 append_pattern_def_seq (stmt_vinfo, def_stmt);
2822 q = vect_recog_temp_ssa_var (itype, NULL);
2823 pattern_stmt
2824 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2825 build_int_cst (itype, post_shift - 1));
2827 else
2829 q = t4;
2830 pattern_stmt = def_stmt;
2833 else
2835 if (pre_shift >= prec || post_shift >= prec)
2836 return NULL;
2838 /* t1 = oprnd0 >> pre_shift;
2839 t2 = t1 h* ml;
2840 q = t2 >> post_shift; */
2841 if (pre_shift)
2843 t1 = vect_recog_temp_ssa_var (itype, NULL);
2844 def_stmt
2845 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2846 build_int_cst (NULL, pre_shift));
2847 append_pattern_def_seq (stmt_vinfo, def_stmt);
2849 else
2850 t1 = oprnd0;
2852 t2 = vect_recog_temp_ssa_var (itype, NULL);
2853 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2854 build_int_cst (itype, ml));
2856 if (post_shift)
2858 append_pattern_def_seq (stmt_vinfo, def_stmt);
2860 q = vect_recog_temp_ssa_var (itype, NULL);
2861 def_stmt
2862 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2863 build_int_cst (itype, post_shift));
2865 else
2866 q = t2;
2868 pattern_stmt = def_stmt;
2871 else
2873 unsigned HOST_WIDE_INT ml;
2874 int post_shift;
2875 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2876 unsigned HOST_WIDE_INT abs_d;
2877 bool add = false;
2878 tree t1, t2, t3, t4;
2880 /* Give up for -1. */
2881 if (d == -1)
2882 return NULL;
2884 /* Since d might be INT_MIN, we have to cast to
2885 unsigned HOST_WIDE_INT before negating to avoid
2886 undefined signed overflow. */
2887 abs_d = (d >= 0
2888 ? (unsigned HOST_WIDE_INT) d
2889 : - (unsigned HOST_WIDE_INT) d);
2891 /* n rem d = n rem -d */
2892 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2894 d = abs_d;
2895 oprnd1 = build_int_cst (itype, abs_d);
2897 else if (HOST_BITS_PER_WIDE_INT >= prec
2898 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2899 /* This case is not handled correctly below. */
2900 return NULL;
2902 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2903 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2905 add = true;
2906 ml |= HOST_WIDE_INT_M1U << (prec - 1);
2908 if (post_shift >= prec)
2909 return NULL;
2911 /* t1 = oprnd0 h* ml; */
2912 t1 = vect_recog_temp_ssa_var (itype, NULL);
2913 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2914 build_int_cst (itype, ml));
2916 if (add)
2918 /* t2 = t1 + oprnd0; */
2919 append_pattern_def_seq (stmt_vinfo, def_stmt);
2920 t2 = vect_recog_temp_ssa_var (itype, NULL);
2921 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2923 else
2924 t2 = t1;
2926 if (post_shift)
2928 /* t3 = t2 >> post_shift; */
2929 append_pattern_def_seq (stmt_vinfo, def_stmt);
2930 t3 = vect_recog_temp_ssa_var (itype, NULL);
2931 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2932 build_int_cst (itype, post_shift));
2934 else
2935 t3 = t2;
2937 wide_int oprnd0_min, oprnd0_max;
2938 int msb = 1;
2939 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2941 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2942 msb = 0;
2943 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2944 msb = -1;
2947 if (msb == 0 && d >= 0)
2949 /* q = t3; */
2950 q = t3;
2951 pattern_stmt = def_stmt;
2953 else
2955 /* t4 = oprnd0 >> (prec - 1);
2956 or if we know from VRP that oprnd0 >= 0
2957 t4 = 0;
2958 or if we know from VRP that oprnd0 < 0
2959 t4 = -1; */
2960 append_pattern_def_seq (stmt_vinfo, def_stmt);
2961 t4 = vect_recog_temp_ssa_var (itype, NULL);
2962 if (msb != 1)
2963 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2964 build_int_cst (itype, msb));
2965 else
2966 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2967 build_int_cst (itype, prec - 1));
2968 append_pattern_def_seq (stmt_vinfo, def_stmt);
2970 /* q = t3 - t4; or q = t4 - t3; */
2971 q = vect_recog_temp_ssa_var (itype, NULL);
2972 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2973 d < 0 ? t3 : t4);
2977 if (rhs_code == TRUNC_MOD_EXPR)
2979 tree r, t1;
2981 /* We divided. Now finish by:
2982 t1 = q * oprnd1;
2983 r = oprnd0 - t1; */
2984 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2986 t1 = vect_recog_temp_ssa_var (itype, NULL);
2987 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
2988 append_pattern_def_seq (stmt_vinfo, def_stmt);
2990 r = vect_recog_temp_ssa_var (itype, NULL);
2991 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
2994 /* Pattern detected. */
2995 if (dump_enabled_p ())
2997 dump_printf_loc (MSG_NOTE, vect_location,
2998 "vect_recog_divmod_pattern: detected: ");
2999 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3002 stmts->safe_push (last_stmt);
3004 *type_in = vectype;
3005 *type_out = vectype;
3006 return pattern_stmt;
3009 /* Function vect_recog_mixed_size_cond_pattern
3011 Try to find the following pattern:
3013 type x_t, y_t;
3014 TYPE a_T, b_T, c_T;
3015 loop:
3016 S1 a_T = x_t CMP y_t ? b_T : c_T;
3018 where type 'TYPE' is an integral type which has different size
3019 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3020 than 'type', the constants need to fit into an integer type
3021 with the same width as 'type') or results of conversion from 'type'.
3023 Input:
3025 * LAST_STMT: A stmt from which the pattern search begins.
3027 Output:
3029 * TYPE_IN: The type of the input arguments to the pattern.
3031 * TYPE_OUT: The type of the output of this pattern.
3033 * Return value: A new stmt that will be used to replace the pattern.
3034 Additionally a def_stmt is added.
3036 a_it = x_t CMP y_t ? b_it : c_it;
3037 a_T = (TYPE) a_it; */
3039 static gimple *
3040 vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
3041 tree *type_out)
3043 gimple *last_stmt = (*stmts)[0];
3044 tree cond_expr, then_clause, else_clause;
3045 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3046 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3047 gimple *pattern_stmt, *def_stmt;
3048 vec_info *vinfo = stmt_vinfo->vinfo;
3049 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3050 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3051 bool promotion;
3052 tree comp_scalar_type;
3054 if (!is_gimple_assign (last_stmt)
3055 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3056 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3057 return NULL;
3059 cond_expr = gimple_assign_rhs1 (last_stmt);
3060 then_clause = gimple_assign_rhs2 (last_stmt);
3061 else_clause = gimple_assign_rhs3 (last_stmt);
3063 if (!COMPARISON_CLASS_P (cond_expr))
3064 return NULL;
3066 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3067 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3068 if (comp_vectype == NULL_TREE)
3069 return NULL;
3071 type = gimple_expr_type (last_stmt);
3072 if (types_compatible_p (type, comp_scalar_type)
3073 || ((TREE_CODE (then_clause) != INTEGER_CST
3074 || TREE_CODE (else_clause) != INTEGER_CST)
3075 && !INTEGRAL_TYPE_P (comp_scalar_type))
3076 || !INTEGRAL_TYPE_P (type))
3077 return NULL;
3079 if ((TREE_CODE (then_clause) != INTEGER_CST
3080 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3081 &def_stmt0, &promotion))
3082 || (TREE_CODE (else_clause) != INTEGER_CST
3083 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3084 &def_stmt1, &promotion)))
3085 return NULL;
3087 if (orig_type0 && orig_type1
3088 && !types_compatible_p (orig_type0, orig_type1))
3089 return NULL;
3091 if (orig_type0)
3093 if (!types_compatible_p (orig_type0, comp_scalar_type))
3094 return NULL;
3095 then_clause = gimple_assign_rhs1 (def_stmt0);
3096 itype = orig_type0;
3099 if (orig_type1)
3101 if (!types_compatible_p (orig_type1, comp_scalar_type))
3102 return NULL;
3103 else_clause = gimple_assign_rhs1 (def_stmt1);
3104 itype = orig_type1;
3108 HOST_WIDE_INT cmp_mode_size
3109 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3111 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3112 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3113 return NULL;
3115 vectype = get_vectype_for_scalar_type (type);
3116 if (vectype == NULL_TREE)
3117 return NULL;
3119 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3120 return NULL;
3122 if (itype == NULL_TREE)
3123 itype = build_nonstandard_integer_type (cmp_mode_size,
3124 TYPE_UNSIGNED (type));
3126 if (itype == NULL_TREE
3127 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3128 return NULL;
3130 vecitype = get_vectype_for_scalar_type (itype);
3131 if (vecitype == NULL_TREE)
3132 return NULL;
3134 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3135 return NULL;
3137 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3139 if ((TREE_CODE (then_clause) == INTEGER_CST
3140 && !int_fits_type_p (then_clause, itype))
3141 || (TREE_CODE (else_clause) == INTEGER_CST
3142 && !int_fits_type_p (else_clause, itype)))
3143 return NULL;
3146 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3147 COND_EXPR, unshare_expr (cond_expr),
3148 fold_convert (itype, then_clause),
3149 fold_convert (itype, else_clause));
3150 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3151 NOP_EXPR, gimple_assign_lhs (def_stmt));
3153 new_pattern_def_seq (stmt_vinfo, def_stmt);
3154 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3155 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3156 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3157 *type_in = vecitype;
3158 *type_out = vectype;
3160 if (dump_enabled_p ())
3161 dump_printf_loc (MSG_NOTE, vect_location,
3162 "vect_recog_mixed_size_cond_pattern: detected:\n");
3164 return pattern_stmt;
3168 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3169 true if bool VAR can and should be optimized that way. Assume it shouldn't
3170 in case it's a result of a comparison which can be directly vectorized into
3171 a vector comparison. Fills in STMTS with all stmts visited during the
3172 walk. */
3174 static bool
3175 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3177 tree rhs1;
3178 enum tree_code rhs_code;
3180 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3181 if (!def_stmt_info)
3182 return false;
3184 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3185 if (!def_stmt)
3186 return false;
3188 if (stmts.contains (def_stmt))
3189 return true;
3191 rhs1 = gimple_assign_rhs1 (def_stmt);
3192 rhs_code = gimple_assign_rhs_code (def_stmt);
3193 switch (rhs_code)
3195 case SSA_NAME:
3196 if (! check_bool_pattern (rhs1, vinfo, stmts))
3197 return false;
3198 break;
3200 CASE_CONVERT:
3201 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3202 return false;
3203 if (! check_bool_pattern (rhs1, vinfo, stmts))
3204 return false;
3205 break;
3207 case BIT_NOT_EXPR:
3208 if (! check_bool_pattern (rhs1, vinfo, stmts))
3209 return false;
3210 break;
3212 case BIT_AND_EXPR:
3213 case BIT_IOR_EXPR:
3214 case BIT_XOR_EXPR:
3215 if (! check_bool_pattern (rhs1, vinfo, stmts)
3216 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3217 return false;
3218 break;
3220 default:
3221 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3223 tree vecitype, comp_vectype;
3225 /* If the comparison can throw, then is_gimple_condexpr will be
3226 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3227 if (stmt_could_throw_p (def_stmt))
3228 return false;
3230 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3231 if (comp_vectype == NULL_TREE)
3232 return false;
3234 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3235 if (mask_type
3236 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3237 return false;
3239 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3241 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3242 tree itype
3243 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3244 vecitype = get_vectype_for_scalar_type (itype);
3245 if (vecitype == NULL_TREE)
3246 return false;
3248 else
3249 vecitype = comp_vectype;
3250 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3251 return false;
3253 else
3254 return false;
3255 break;
3258 bool res = stmts.add (def_stmt);
3259 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3260 gcc_assert (!res);
3262 return true;
3266 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3267 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3268 pattern sequence. */
3270 static tree
3271 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3273 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3274 NOP_EXPR, var);
3275 stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3276 set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3277 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3278 append_pattern_def_seq (stmt_info, cast_stmt);
3279 return gimple_assign_lhs (cast_stmt);
3282 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3283 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3284 type, OUT_TYPE is the desired final integer type of the whole pattern.
3285 STMT_INFO is the info of the pattern root and is where pattern stmts should
3286 be associated with. DEFS is a map of pattern defs. */
3288 static void
3289 adjust_bool_pattern (tree var, tree out_type,
3290 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3292 gimple *stmt = SSA_NAME_DEF_STMT (var);
3293 enum tree_code rhs_code, def_rhs_code;
3294 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3295 location_t loc;
3296 gimple *pattern_stmt, *def_stmt;
3297 tree trueval = NULL_TREE;
3299 rhs1 = gimple_assign_rhs1 (stmt);
3300 rhs2 = gimple_assign_rhs2 (stmt);
3301 rhs_code = gimple_assign_rhs_code (stmt);
3302 loc = gimple_location (stmt);
3303 switch (rhs_code)
3305 case SSA_NAME:
3306 CASE_CONVERT:
3307 irhs1 = *defs.get (rhs1);
3308 itype = TREE_TYPE (irhs1);
3309 pattern_stmt
3310 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3311 SSA_NAME, irhs1);
3312 break;
3314 case BIT_NOT_EXPR:
3315 irhs1 = *defs.get (rhs1);
3316 itype = TREE_TYPE (irhs1);
3317 pattern_stmt
3318 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3319 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3320 break;
3322 case BIT_AND_EXPR:
3323 /* Try to optimize x = y & (a < b ? 1 : 0); into
3324 x = (a < b ? y : 0);
3326 E.g. for:
3327 bool a_b, b_b, c_b;
3328 TYPE d_T;
3330 S1 a_b = x1 CMP1 y1;
3331 S2 b_b = x2 CMP2 y2;
3332 S3 c_b = a_b & b_b;
3333 S4 d_T = (TYPE) c_b;
3335 we would normally emit:
3337 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3338 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3339 S3' c_T = a_T & b_T;
3340 S4' d_T = c_T;
3342 but we can save one stmt by using the
3343 result of one of the COND_EXPRs in the other COND_EXPR and leave
3344 BIT_AND_EXPR stmt out:
3346 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3347 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3348 S4' f_T = c_T;
3350 At least when VEC_COND_EXPR is implemented using masks
3351 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3352 computes the comparison masks and ands it, in one case with
3353 all ones vector, in the other case with a vector register.
3354 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3355 often more expensive. */
3356 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3357 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3358 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3360 irhs1 = *defs.get (rhs1);
3361 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3362 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3363 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3365 rhs_code = def_rhs_code;
3366 rhs1 = def_rhs1;
3367 rhs2 = gimple_assign_rhs2 (def_stmt);
3368 trueval = irhs1;
3369 goto do_compare;
3371 else
3372 irhs2 = *defs.get (rhs2);
3373 goto and_ior_xor;
3375 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3376 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3377 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3379 irhs2 = *defs.get (rhs2);
3380 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3381 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3382 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3384 rhs_code = def_rhs_code;
3385 rhs1 = def_rhs1;
3386 rhs2 = gimple_assign_rhs2 (def_stmt);
3387 trueval = irhs2;
3388 goto do_compare;
3390 else
3391 irhs1 = *defs.get (rhs1);
3392 goto and_ior_xor;
3394 /* FALLTHRU */
3395 case BIT_IOR_EXPR:
3396 case BIT_XOR_EXPR:
3397 irhs1 = *defs.get (rhs1);
3398 irhs2 = *defs.get (rhs2);
3399 and_ior_xor:
3400 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3401 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3403 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3404 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3405 int out_prec = TYPE_PRECISION (out_type);
3406 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3407 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3408 stmt_info);
3409 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3410 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3411 stmt_info);
3412 else
3414 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3415 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3418 itype = TREE_TYPE (irhs1);
3419 pattern_stmt
3420 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3421 rhs_code, irhs1, irhs2);
3422 break;
3424 default:
3425 do_compare:
3426 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3427 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3428 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3429 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3430 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3432 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3433 itype
3434 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3436 else
3437 itype = TREE_TYPE (rhs1);
3438 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3439 if (trueval == NULL_TREE)
3440 trueval = build_int_cst (itype, 1);
3441 else
3442 gcc_checking_assert (useless_type_conversion_p (itype,
3443 TREE_TYPE (trueval)));
3444 pattern_stmt
3445 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3446 COND_EXPR, cond_expr, trueval,
3447 build_int_cst (itype, 0));
3448 break;
3451 gimple_set_location (pattern_stmt, loc);
3452 /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3453 pattern def seq stmts instead of just letting auto-detection do
3454 its work? */
3455 stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3456 set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3457 STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3458 append_pattern_def_seq (stmt_info, pattern_stmt);
3459 defs.put (var, gimple_assign_lhs (pattern_stmt));
3462 /* Comparison function to qsort a vector of gimple stmts after UID. */
3464 static int
3465 sort_after_uid (const void *p1, const void *p2)
3467 const gimple *stmt1 = *(const gimple * const *)p1;
3468 const gimple *stmt2 = *(const gimple * const *)p2;
3469 return gimple_uid (stmt1) - gimple_uid (stmt2);
3472 /* Create pattern stmts for all stmts participating in the bool pattern
3473 specified by BOOL_STMT_SET and its root STMT with the desired type
3474 OUT_TYPE. Return the def of the pattern root. */
3476 static tree
3477 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3478 tree out_type, gimple *stmt)
3480 /* Gather original stmts in the bool pattern in their order of appearance
3481 in the IL. */
3482 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3483 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3484 i != bool_stmt_set.end (); ++i)
3485 bool_stmts.quick_push (*i);
3486 bool_stmts.qsort (sort_after_uid);
3488 /* Now process them in that order, producing pattern stmts. */
3489 hash_map <tree, tree> defs;
3490 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3491 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3492 out_type, vinfo_for_stmt (stmt), defs);
3494 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3495 gimple *pattern_stmt
3496 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3497 return gimple_assign_lhs (pattern_stmt);
3500 /* Helper for search_type_for_mask. */
3502 static tree
3503 search_type_for_mask_1 (tree var, vec_info *vinfo,
3504 hash_map<gimple *, tree> &cache)
3506 tree rhs1;
3507 enum tree_code rhs_code;
3508 tree res = NULL_TREE, res2;
3510 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3511 return NULL_TREE;
3513 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3514 if (!def_stmt_info)
3515 return NULL_TREE;
3517 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3518 if (!def_stmt)
3519 return NULL_TREE;
3521 tree *c = cache.get (def_stmt);
3522 if (c)
3523 return *c;
3525 rhs_code = gimple_assign_rhs_code (def_stmt);
3526 rhs1 = gimple_assign_rhs1 (def_stmt);
3528 switch (rhs_code)
3530 case SSA_NAME:
3531 case BIT_NOT_EXPR:
3532 CASE_CONVERT:
3533 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3534 break;
3536 case BIT_AND_EXPR:
3537 case BIT_IOR_EXPR:
3538 case BIT_XOR_EXPR:
3539 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3540 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3541 cache);
3542 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3543 res = res2;
3544 break;
3546 default:
3547 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3549 tree comp_vectype, mask_type;
3551 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3553 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3554 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3555 vinfo, cache);
3556 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3557 res = res2;
3558 break;
3561 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3562 if (comp_vectype == NULL_TREE)
3564 res = NULL_TREE;
3565 break;
3568 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3569 if (!mask_type
3570 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3572 res = NULL_TREE;
3573 break;
3576 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3577 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3579 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3580 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3582 else
3583 res = TREE_TYPE (rhs1);
3587 cache.put (def_stmt, res);
3588 return res;
3591 /* Return the proper type for converting bool VAR into
3592 an integer value or NULL_TREE if no such type exists.
3593 The type is chosen so that converted value has the
3594 same number of elements as VAR's vector type. */
3596 static tree
3597 search_type_for_mask (tree var, vec_info *vinfo)
3599 hash_map<gimple *, tree> cache;
3600 return search_type_for_mask_1 (var, vinfo, cache);
3603 /* Function vect_recog_bool_pattern
3605 Try to find pattern like following:
3607 bool a_b, b_b, c_b, d_b, e_b;
3608 TYPE f_T;
3609 loop:
3610 S1 a_b = x1 CMP1 y1;
3611 S2 b_b = x2 CMP2 y2;
3612 S3 c_b = a_b & b_b;
3613 S4 d_b = x3 CMP3 y3;
3614 S5 e_b = c_b | d_b;
3615 S6 f_T = (TYPE) e_b;
3617 where type 'TYPE' is an integral type. Or a similar pattern
3618 ending in
3620 S6 f_Y = e_b ? r_Y : s_Y;
3622 as results from if-conversion of a complex condition.
3624 Input:
3626 * LAST_STMT: A stmt at the end from which the pattern
3627 search begins, i.e. cast of a bool to
3628 an integer type.
3630 Output:
3632 * TYPE_IN: The type of the input arguments to the pattern.
3634 * TYPE_OUT: The type of the output of this pattern.
3636 * Return value: A new stmt that will be used to replace the pattern.
3638 Assuming size of TYPE is the same as size of all comparisons
3639 (otherwise some casts would be added where needed), the above
3640 sequence we create related pattern stmts:
3641 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3642 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3643 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3644 S5' e_T = c_T | d_T;
3645 S6' f_T = e_T;
3647 Instead of the above S3' we could emit:
3648 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3649 S3' c_T = a_T | b_T;
3650 but the above is more efficient. */
3652 static gimple *
3653 vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3654 tree *type_out)
3656 gimple *last_stmt = stmts->pop ();
3657 enum tree_code rhs_code;
3658 tree var, lhs, rhs, vectype;
3659 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3660 stmt_vec_info new_stmt_info;
3661 vec_info *vinfo = stmt_vinfo->vinfo;
3662 gimple *pattern_stmt;
3664 if (!is_gimple_assign (last_stmt))
3665 return NULL;
3667 var = gimple_assign_rhs1 (last_stmt);
3668 lhs = gimple_assign_lhs (last_stmt);
3670 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3671 return NULL;
3673 hash_set<gimple *> bool_stmts;
3675 rhs_code = gimple_assign_rhs_code (last_stmt);
3676 if (CONVERT_EXPR_CODE_P (rhs_code))
3678 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3679 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3680 return NULL;
3681 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3682 if (vectype == NULL_TREE)
3683 return NULL;
3685 if (check_bool_pattern (var, vinfo, bool_stmts))
3687 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3688 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3689 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3690 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3691 else
3692 pattern_stmt
3693 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3695 else
3697 tree type = search_type_for_mask (var, vinfo);
3698 tree cst0, cst1, tmp;
3700 if (!type)
3701 return NULL;
3703 /* We may directly use cond with narrowed type to avoid
3704 multiple cond exprs with following result packing and
3705 perform single cond with packed mask instead. In case
3706 of widening we better make cond first and then extract
3707 results. */
3708 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3709 type = TREE_TYPE (lhs);
3711 cst0 = build_int_cst (type, 0);
3712 cst1 = build_int_cst (type, 1);
3713 tmp = vect_recog_temp_ssa_var (type, NULL);
3714 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3716 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3718 tree new_vectype = get_vectype_for_scalar_type (type);
3719 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3720 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3721 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3722 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3724 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3725 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3729 *type_out = vectype;
3730 *type_in = vectype;
3731 stmts->safe_push (last_stmt);
3732 if (dump_enabled_p ())
3733 dump_printf_loc (MSG_NOTE, vect_location,
3734 "vect_recog_bool_pattern: detected:\n");
3736 return pattern_stmt;
3738 else if (rhs_code == COND_EXPR
3739 && TREE_CODE (var) == SSA_NAME)
3741 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3742 if (vectype == NULL_TREE)
3743 return NULL;
3745 /* Build a scalar type for the boolean result that when
3746 vectorized matches the vector type of the result in
3747 size and number of elements. */
3748 unsigned prec
3749 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3750 TYPE_VECTOR_SUBPARTS (vectype));
3752 tree type
3753 = build_nonstandard_integer_type (prec,
3754 TYPE_UNSIGNED (TREE_TYPE (var)));
3755 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3756 return NULL;
3758 if (!check_bool_pattern (var, vinfo, bool_stmts))
3759 return NULL;
3761 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3763 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3764 pattern_stmt
3765 = gimple_build_assign (lhs, COND_EXPR,
3766 build2 (NE_EXPR, boolean_type_node,
3767 rhs, build_int_cst (type, 0)),
3768 gimple_assign_rhs2 (last_stmt),
3769 gimple_assign_rhs3 (last_stmt));
3770 *type_out = vectype;
3771 *type_in = vectype;
3772 stmts->safe_push (last_stmt);
3773 if (dump_enabled_p ())
3774 dump_printf_loc (MSG_NOTE, vect_location,
3775 "vect_recog_bool_pattern: detected:\n");
3777 return pattern_stmt;
3779 else if (rhs_code == SSA_NAME
3780 && STMT_VINFO_DATA_REF (stmt_vinfo))
3782 stmt_vec_info pattern_stmt_info;
3783 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3784 gcc_assert (vectype != NULL_TREE);
3785 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3786 return NULL;
3788 if (check_bool_pattern (var, vinfo, bool_stmts))
3789 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3790 else
3792 tree type = search_type_for_mask (var, vinfo);
3793 tree cst0, cst1, new_vectype;
3795 if (!type)
3796 return NULL;
3798 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3799 type = TREE_TYPE (vectype);
3801 cst0 = build_int_cst (type, 0);
3802 cst1 = build_int_cst (type, 1);
3803 new_vectype = get_vectype_for_scalar_type (type);
3805 rhs = vect_recog_temp_ssa_var (type, NULL);
3806 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3808 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3809 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3810 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3811 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3814 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3815 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3817 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3818 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3819 append_pattern_def_seq (stmt_vinfo, cast_stmt);
3820 rhs = rhs2;
3822 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3823 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3824 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3825 STMT_VINFO_DATA_REF (pattern_stmt_info)
3826 = STMT_VINFO_DATA_REF (stmt_vinfo);
3827 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3828 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3829 *type_out = vectype;
3830 *type_in = vectype;
3831 stmts->safe_push (last_stmt);
3832 if (dump_enabled_p ())
3833 dump_printf_loc (MSG_NOTE, vect_location,
3834 "vect_recog_bool_pattern: detected:\n");
3835 return pattern_stmt;
3837 else
3838 return NULL;
3842 /* A helper for vect_recog_mask_conversion_pattern. Build
3843 conversion of MASK to a type suitable for masking VECTYPE.
3844 Built statement gets required vectype and is appended to
3845 a pattern sequence of STMT_VINFO.
3847 Return converted mask. */
3849 static tree
3850 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3851 vec_info *vinfo)
3853 gimple *stmt;
3854 tree masktype, tmp;
3855 stmt_vec_info new_stmt_info;
3857 masktype = build_same_sized_truth_vector_type (vectype);
3858 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3859 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3860 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3861 set_vinfo_for_stmt (stmt, new_stmt_info);
3862 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3863 append_pattern_def_seq (stmt_vinfo, stmt);
3865 return tmp;
3869 /* Function vect_recog_mask_conversion_pattern
3871 Try to find statements which require boolean type
3872 converison. Additional conversion statements are
3873 added to handle such cases. For example:
3875 bool m_1, m_2, m_3;
3876 int i_4, i_5;
3877 double d_6, d_7;
3878 char c_1, c_2, c_3;
3880 S1 m_1 = i_4 > i_5;
3881 S2 m_2 = d_6 < d_7;
3882 S3 m_3 = m_1 & m_2;
3883 S4 c_1 = m_3 ? c_2 : c_3;
3885 Will be transformed into:
3887 S1 m_1 = i_4 > i_5;
3888 S2 m_2 = d_6 < d_7;
3889 S3'' m_2' = (_Bool[bitsize=32])m_2
3890 S3' m_3' = m_1 & m_2';
3891 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3892 S4' c_1' = m_3'' ? c_2 : c_3; */
3894 static gimple *
3895 vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3896 tree *type_out)
3898 gimple *last_stmt = stmts->pop ();
3899 enum tree_code rhs_code;
3900 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3901 tree vectype1, vectype2;
3902 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3903 stmt_vec_info pattern_stmt_info;
3904 vec_info *vinfo = stmt_vinfo->vinfo;
3906 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3907 if (is_gimple_call (last_stmt)
3908 && gimple_call_internal_p (last_stmt)
3909 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3910 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3912 gcall *pattern_stmt;
3913 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3915 if (load)
3917 lhs = gimple_call_lhs (last_stmt);
3918 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3920 else
3922 rhs2 = gimple_call_arg (last_stmt, 3);
3923 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3926 rhs1 = gimple_call_arg (last_stmt, 2);
3927 rhs1_type = search_type_for_mask (rhs1, vinfo);
3928 if (!rhs1_type)
3929 return NULL;
3930 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3932 if (!vectype1 || !vectype2
3933 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3934 TYPE_VECTOR_SUBPARTS (vectype2)))
3935 return NULL;
3937 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3939 if (load)
3941 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3942 pattern_stmt
3943 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3944 gimple_call_arg (last_stmt, 0),
3945 gimple_call_arg (last_stmt, 1),
3946 tmp);
3947 gimple_call_set_lhs (pattern_stmt, lhs);
3949 else
3950 pattern_stmt
3951 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3952 gimple_call_arg (last_stmt, 0),
3953 gimple_call_arg (last_stmt, 1),
3954 tmp,
3955 gimple_call_arg (last_stmt, 3));
3957 gimple_call_set_nothrow (pattern_stmt, true);
3959 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3960 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3961 STMT_VINFO_DATA_REF (pattern_stmt_info)
3962 = STMT_VINFO_DATA_REF (stmt_vinfo);
3963 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3964 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3966 *type_out = vectype1;
3967 *type_in = vectype1;
3968 stmts->safe_push (last_stmt);
3969 if (dump_enabled_p ())
3970 dump_printf_loc (MSG_NOTE, vect_location,
3971 "vect_recog_mask_conversion_pattern: detected:\n");
3973 return pattern_stmt;
3976 if (!is_gimple_assign (last_stmt))
3977 return NULL;
3979 gimple *pattern_stmt;
3980 lhs = gimple_assign_lhs (last_stmt);
3981 rhs1 = gimple_assign_rhs1 (last_stmt);
3982 rhs_code = gimple_assign_rhs_code (last_stmt);
3984 /* Check for cond expression requiring mask conversion. */
3985 if (rhs_code == COND_EXPR)
3987 /* vect_recog_mixed_size_cond_pattern could apply.
3988 Do nothing then. */
3989 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3990 return NULL;
3992 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3994 if (TREE_CODE (rhs1) == SSA_NAME)
3996 rhs1_type = search_type_for_mask (rhs1, vinfo);
3997 if (!rhs1_type)
3998 return NULL;
4000 else if (COMPARISON_CLASS_P (rhs1))
4002 /* Check whether we're comparing scalar booleans and (if so)
4003 whether a better mask type exists than the mask associated
4004 with boolean-sized elements. This avoids unnecessary packs
4005 and unpacks if the booleans are set from comparisons of
4006 wider types. E.g. in:
4008 int x1, x2, x3, x4, y1, y1;
4010 bool b1 = (x1 == x2);
4011 bool b2 = (x3 == x4);
4012 ... = b1 == b2 ? y1 : y2;
4014 it is better for b1 and b2 to use the mask type associated
4015 with int elements rather bool (byte) elements. */
4016 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4017 if (!rhs1_type)
4018 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4020 else
4021 return NULL;
4023 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4025 if (!vectype1 || !vectype2)
4026 return NULL;
4028 /* Continue if a conversion is needed. Also continue if we have
4029 a comparison whose vector type would normally be different from
4030 VECTYPE2 when considered in isolation. In that case we'll
4031 replace the comparison with an SSA name (so that we can record
4032 its vector type) and behave as though the comparison was an SSA
4033 name from the outset. */
4034 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4035 TYPE_VECTOR_SUBPARTS (vectype2))
4036 && (TREE_CODE (rhs1) == SSA_NAME
4037 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4038 return NULL;
4040 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4041 in place, we can handle it in vectorizable_condition. This avoids
4042 unnecessary promotion stmts and increased vectorization factor. */
4043 if (COMPARISON_CLASS_P (rhs1)
4044 && INTEGRAL_TYPE_P (rhs1_type)
4045 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4046 TYPE_VECTOR_SUBPARTS (vectype2)))
4048 gimple *dummy;
4049 enum vect_def_type dt;
4050 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
4051 &dummy, &dt)
4052 && dt == vect_external_def
4053 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
4054 &dummy, &dt)
4055 && (dt == vect_external_def
4056 || dt == vect_constant_def))
4058 tree wide_scalar_type = build_nonstandard_integer_type
4059 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4060 TYPE_UNSIGNED (rhs1_type));
4061 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4062 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4063 return NULL;
4067 /* If rhs1 is a comparison we need to move it into a
4068 separate statement. */
4069 if (TREE_CODE (rhs1) != SSA_NAME)
4071 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4072 pattern_stmt = gimple_build_assign (tmp, rhs1);
4073 rhs1 = tmp;
4075 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4076 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4077 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4078 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4081 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4082 TYPE_VECTOR_SUBPARTS (vectype2)))
4083 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4084 else
4085 tmp = rhs1;
4087 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4088 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4089 gimple_assign_rhs2 (last_stmt),
4090 gimple_assign_rhs3 (last_stmt));
4092 *type_out = vectype1;
4093 *type_in = vectype1;
4094 stmts->safe_push (last_stmt);
4095 if (dump_enabled_p ())
4096 dump_printf_loc (MSG_NOTE, vect_location,
4097 "vect_recog_mask_conversion_pattern: detected:\n");
4099 return pattern_stmt;
4102 /* Now check for binary boolean operations requiring conversion for
4103 one of operands. */
4104 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4105 return NULL;
4107 if (rhs_code != BIT_IOR_EXPR
4108 && rhs_code != BIT_XOR_EXPR
4109 && rhs_code != BIT_AND_EXPR
4110 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4111 return NULL;
4113 rhs2 = gimple_assign_rhs2 (last_stmt);
4115 rhs1_type = search_type_for_mask (rhs1, vinfo);
4116 rhs2_type = search_type_for_mask (rhs2, vinfo);
4118 if (!rhs1_type || !rhs2_type
4119 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4120 return NULL;
4122 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4124 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4125 if (!vectype1)
4126 return NULL;
4127 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4129 else
4131 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4132 if (!vectype1)
4133 return NULL;
4134 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4137 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4138 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4140 *type_out = vectype1;
4141 *type_in = vectype1;
4142 stmts->safe_push (last_stmt);
4143 if (dump_enabled_p ())
4144 dump_printf_loc (MSG_NOTE, vect_location,
4145 "vect_recog_mask_conversion_pattern: detected:\n");
4147 return pattern_stmt;
4150 /* STMT is a load or store. If the load or store is conditional, return
4151 the boolean condition under which it occurs, otherwise return null. */
4153 static tree
4154 vect_get_load_store_mask (gimple *stmt)
4156 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4158 gcc_assert (gimple_assign_single_p (def_assign));
4159 return NULL_TREE;
4162 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4164 internal_fn ifn = gimple_call_internal_fn (def_call);
4165 int mask_index = internal_fn_mask_index (ifn);
4166 return gimple_call_arg (def_call, mask_index);
4169 gcc_unreachable ();
4172 /* Return the scalar offset type that an internal gather/scatter function
4173 should use. GS_INFO describes the gather/scatter operation. */
4175 static tree
4176 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4178 tree offset_type = TREE_TYPE (gs_info->offset);
4179 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4181 /* Enforced by vect_check_gather_scatter. */
4182 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4183 gcc_assert (element_bits >= offset_bits);
4185 /* If the offset is narrower than the elements, extend it according
4186 to its sign. */
4187 if (element_bits > offset_bits)
4188 return build_nonstandard_integer_type (element_bits,
4189 TYPE_UNSIGNED (offset_type));
4191 return offset_type;
4194 /* Return MASK if MASK is suitable for masking an operation on vectors
4195 of type VECTYPE, otherwise convert it into such a form and return
4196 the result. Associate any conversion statements with STMT_INFO's
4197 pattern. */
4199 static tree
4200 vect_convert_mask_for_vectype (tree mask, tree vectype,
4201 stmt_vec_info stmt_info, vec_info *vinfo)
4203 tree mask_type = search_type_for_mask (mask, vinfo);
4204 if (mask_type)
4206 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4207 if (mask_vectype
4208 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4209 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4210 mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4212 return mask;
4215 /* Return the equivalent of:
4217 fold_convert (TYPE, VALUE)
4219 with the expectation that the operation will be vectorized.
4220 If new statements are needed, add them as pattern statements
4221 to STMT_INFO. */
4223 static tree
4224 vect_add_conversion_to_patterm (tree type, tree value,
4225 stmt_vec_info stmt_info,
4226 vec_info *vinfo)
4228 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4229 return value;
4231 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4232 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4233 stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4234 set_vinfo_for_stmt (conversion, new_stmt_info);
4235 STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4236 append_pattern_def_seq (stmt_info, conversion);
4237 return new_value;
4240 /* Try to convert STMT into a call to a gather load or scatter store
4241 internal function. Return the final statement on success and set
4242 *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4244 This function only handles gathers and scatters that were recognized
4245 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4247 static gimple *
4248 vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4249 tree *type_in, tree *type_out)
4251 /* Currently we only support this for loop vectorization. */
4252 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4253 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4254 if (!loop_vinfo)
4255 return NULL;
4257 /* Make sure that we're looking at a gather load or scatter store. */
4258 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4259 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4260 return NULL;
4262 /* Get the boolean that controls whether the load or store happens.
4263 This is null if the operation is unconditional. */
4264 tree mask = vect_get_load_store_mask (stmt);
4266 /* Make sure that the target supports an appropriate internal
4267 function for the gather/scatter operation. */
4268 gather_scatter_info gs_info;
4269 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4270 || gs_info.decl)
4271 return NULL;
4273 /* Convert the mask to the right form. */
4274 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4275 if (mask)
4276 mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4277 loop_vinfo);
4279 /* Get the invariant base and non-invariant offset, converting the
4280 latter to the same width as the vector elements. */
4281 tree base = gs_info.base;
4282 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4283 tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4284 last_stmt_info, loop_vinfo);
4286 /* Build the new pattern statement. */
4287 tree scale = size_int (gs_info.scale);
4288 gcall *pattern_stmt;
4289 if (DR_IS_READ (dr))
4291 if (mask != NULL)
4292 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4293 offset, scale, mask);
4294 else
4295 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4296 offset, scale);
4297 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4298 gimple_call_set_lhs (pattern_stmt, load_lhs);
4300 else
4302 tree rhs = vect_get_store_rhs (stmt);
4303 if (mask != NULL)
4304 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4305 base, offset, scale, rhs,
4306 mask);
4307 else
4308 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4309 base, offset, scale, rhs);
4311 gimple_call_set_nothrow (pattern_stmt, true);
4313 /* Copy across relevant vectorization info and associate DR with the
4314 new pattern statement instead of the original statement. */
4315 stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4316 loop_vinfo);
4317 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4318 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4319 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4320 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4321 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4322 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4324 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4325 *type_out = vectype;
4326 *type_in = vectype;
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_NOTE, vect_location,
4330 "gather/scatter pattern detected:\n");
4332 return pattern_stmt;
4335 /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4337 static gimple *
4338 vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
4339 tree *type_out)
4341 gimple *last_stmt = stmts->pop ();
4342 stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4343 gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4344 last_stmt_info,
4345 type_in, type_out);
4346 if (pattern_stmt)
4347 stmts->safe_push (last_stmt);
4348 return pattern_stmt;
4351 /* Mark statements that are involved in a pattern. */
4353 static inline void
4354 vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4355 tree pattern_vectype)
4357 stmt_vec_info pattern_stmt_info, def_stmt_info;
4358 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4359 vec_info *vinfo = orig_stmt_info->vinfo;
4360 gimple *def_stmt;
4362 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4363 if (pattern_stmt_info == NULL)
4365 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4366 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4368 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4370 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4371 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4372 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4373 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4374 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4375 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4376 if (gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info))
4377 for (gimple_stmt_iterator si = gsi_start (def_seq);
4378 !gsi_end_p (si); gsi_next (&si))
4380 def_stmt = gsi_stmt (si);
4381 def_stmt_info = vinfo_for_stmt (def_stmt);
4382 if (def_stmt_info == NULL)
4384 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4385 set_vinfo_for_stmt (def_stmt, def_stmt_info);
4387 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4388 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4389 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4390 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4391 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4395 /* Function vect_pattern_recog_1
4397 Input:
4398 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4399 computation pattern.
4400 STMT: A stmt from which the pattern search should start.
4402 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4403 expression that computes the same functionality and can be used to
4404 replace the sequence of stmts that are involved in the pattern.
4406 Output:
4407 This function checks if the expression returned by PATTERN_RECOG_FUNC is
4408 supported in vector form by the target. We use 'TYPE_IN' to obtain the
4409 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4410 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4411 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4412 to the available target pattern.
4414 This function also does some bookkeeping, as explained in the documentation
4415 for vect_recog_pattern. */
4417 static bool
4418 vect_pattern_recog_1 (vect_recog_func *recog_func,
4419 gimple_stmt_iterator si,
4420 vec<gimple *> *stmts_to_replace)
4422 gimple *stmt = gsi_stmt (si), *pattern_stmt;
4423 stmt_vec_info stmt_info;
4424 loop_vec_info loop_vinfo;
4425 tree pattern_vectype;
4426 tree type_in, type_out;
4427 enum tree_code code;
4428 int i;
4430 stmts_to_replace->truncate (0);
4431 stmts_to_replace->quick_push (stmt);
4432 pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4433 if (!pattern_stmt)
4435 /* Clear related stmt info that analysis might have noted for
4436 to be replaced stmts. */
4437 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4438 && (unsigned) i < stmts_to_replace->length ();
4439 i++)
4441 stmt_info = vinfo_for_stmt (stmt);
4442 STMT_VINFO_RELATED_STMT (stmt_info) = NULL;
4444 return false;
4447 stmt = stmts_to_replace->last ();
4448 stmt_info = vinfo_for_stmt (stmt);
4449 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4451 if (VECTOR_BOOLEAN_TYPE_P (type_in)
4452 || VECTOR_TYPE_P (type_in))
4454 /* No need to check target support (already checked by the pattern
4455 recognition function). */
4456 pattern_vectype = type_out ? type_out : type_in;
4458 else
4460 /* Check target support */
4461 type_in = get_vectype_for_scalar_type (type_in);
4462 if (!type_in)
4463 return false;
4464 if (type_out)
4465 type_out = get_vectype_for_scalar_type (type_out);
4466 else
4467 type_out = type_in;
4468 if (!type_out)
4469 return false;
4470 pattern_vectype = type_out;
4472 if (is_gimple_assign (pattern_stmt))
4474 enum insn_code icode;
4475 code = gimple_assign_rhs_code (pattern_stmt);
4476 optab optab = optab_for_tree_code (code, type_in, optab_default);
4477 machine_mode vec_mode = TYPE_MODE (type_in);
4478 if (!optab
4479 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4480 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4481 return false;
4483 else
4484 gcc_assert (is_gimple_call (pattern_stmt));
4487 /* Found a vectorizable pattern. */
4488 if (dump_enabled_p ())
4490 dump_printf_loc (MSG_NOTE, vect_location,
4491 "%s pattern recognized: ", recog_func->name);
4492 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4495 /* Mark the stmts that are involved in the pattern. */
4496 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4498 /* Patterns cannot be vectorized using SLP, because they change the order of
4499 computation. */
4500 if (loop_vinfo)
4502 unsigned ix, ix2;
4503 gimple **elem_ptr;
4504 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4505 elem_ptr, *elem_ptr == stmt);
4508 /* It is possible that additional pattern stmts are created and inserted in
4509 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4510 relevant statements. */
4511 for (i = 0; stmts_to_replace->iterate (i, &stmt)
4512 && (unsigned) i < (stmts_to_replace->length () - 1);
4513 i++)
4515 stmt_info = vinfo_for_stmt (stmt);
4516 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4517 if (dump_enabled_p ())
4519 dump_printf_loc (MSG_NOTE, vect_location,
4520 "additional pattern stmt: ");
4521 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4524 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4527 return true;
4531 /* Function vect_pattern_recog
4533 Input:
4534 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4535 computation idioms.
4537 Output - for each computation idiom that is detected we create a new stmt
4538 that provides the same functionality and that can be vectorized. We
4539 also record some information in the struct_stmt_info of the relevant
4540 stmts, as explained below:
4542 At the entry to this function we have the following stmts, with the
4543 following initial value in the STMT_VINFO fields:
4545 stmt in_pattern_p related_stmt vec_stmt
4546 S1: a_i = .... - - -
4547 S2: a_2 = ..use(a_i).. - - -
4548 S3: a_1 = ..use(a_2).. - - -
4549 S4: a_0 = ..use(a_1).. - - -
4550 S5: ... = ..use(a_0).. - - -
4552 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4553 represented by a single stmt. We then:
4554 - create a new stmt S6 equivalent to the pattern (the stmt is not
4555 inserted into the code)
4556 - fill in the STMT_VINFO fields as follows:
4558 in_pattern_p related_stmt vec_stmt
4559 S1: a_i = .... - - -
4560 S2: a_2 = ..use(a_i).. - - -
4561 S3: a_1 = ..use(a_2).. - - -
4562 S4: a_0 = ..use(a_1).. true S6 -
4563 '---> S6: a_new = .... - S4 -
4564 S5: ... = ..use(a_0).. - - -
4566 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4567 to each other through the RELATED_STMT field).
4569 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4570 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4571 remain irrelevant unless used by stmts other than S4.
4573 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4574 (because they are marked as irrelevant). It will vectorize S6, and record
4575 a pointer to the new vector stmt VS6 from S6 (as usual).
4576 S4 will be skipped, and S5 will be vectorized as usual:
4578 in_pattern_p related_stmt vec_stmt
4579 S1: a_i = .... - - -
4580 S2: a_2 = ..use(a_i).. - - -
4581 S3: a_1 = ..use(a_2).. - - -
4582 > VS6: va_new = .... - - -
4583 S4: a_0 = ..use(a_1).. true S6 VS6
4584 '---> S6: a_new = .... - S4 VS6
4585 > VS5: ... = ..vuse(va_new).. - - -
4586 S5: ... = ..use(a_0).. - - -
4588 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4589 elsewhere), and we'll end up with:
4591 VS6: va_new = ....
4592 VS5: ... = ..vuse(va_new)..
4594 In case of more than one pattern statements, e.g., widen-mult with
4595 intermediate type:
4597 S1 a_t = ;
4598 S2 a_T = (TYPE) a_t;
4599 '--> S3: a_it = (interm_type) a_t;
4600 S4 prod_T = a_T * CONST;
4601 '--> S5: prod_T' = a_it w* CONST;
4603 there may be other users of a_T outside the pattern. In that case S2 will
4604 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4605 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4606 be recorded in S3. */
4608 void
4609 vect_pattern_recog (vec_info *vinfo)
4611 struct loop *loop;
4612 basic_block *bbs;
4613 unsigned int nbbs;
4614 gimple_stmt_iterator si;
4615 unsigned int i, j;
4616 auto_vec<gimple *, 1> stmts_to_replace;
4618 DUMP_VECT_SCOPE ("vect_pattern_recog");
4620 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4622 loop = LOOP_VINFO_LOOP (loop_vinfo);
4623 bbs = LOOP_VINFO_BBS (loop_vinfo);
4624 nbbs = loop->num_nodes;
4626 /* Scan through the loop stmts, applying the pattern recognition
4627 functions starting at each stmt visited: */
4628 for (i = 0; i < nbbs; i++)
4630 basic_block bb = bbs[i];
4631 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4633 gimple *stmt = gsi_stmt (si);
4634 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4635 if (stmt_info && STMT_VINFO_IN_PATTERN_P (stmt_info))
4636 continue;
4637 /* Scan over all generic vect_recog_xxx_pattern functions. */
4638 for (j = 0; j < NUM_PATTERNS; j++)
4639 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4640 &stmts_to_replace))
4641 break;
4645 else
4647 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4648 for (si = bb_vinfo->region_begin;
4649 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4651 gimple *stmt = gsi_stmt (si);
4652 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4653 if (stmt_info
4654 && (!STMT_VINFO_VECTORIZABLE (stmt_info)
4655 || STMT_VINFO_IN_PATTERN_P (stmt_info)))
4656 continue;
4658 /* Scan over all generic vect_recog_xxx_pattern functions. */
4659 for (j = 0; j < NUM_PATTERNS; j++)
4660 if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4661 &stmts_to_replace))
4662 break;