testsuite: Correct vec-rlmi-rlnm.c testsuite expected result
[official-gcc.git] / gcc / tree-vect-patterns.c
blobac56acebe016058cbbc9599cef348ec4211c19d6
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
52 /* Return true if we have a useful VR_RANGE range for VAR, storing it
53 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
55 static bool
56 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
58 value_range_kind vr_type = get_range_info (var, min_value, max_value);
59 wide_int nonzero = get_nonzero_bits (var);
60 signop sgn = TYPE_SIGN (TREE_TYPE (var));
61 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 nonzero, sgn) == VR_RANGE)
64 if (dump_enabled_p ())
66 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 dump_printf (MSG_NOTE, " has range [");
68 dump_hex (MSG_NOTE, *min_value);
69 dump_printf (MSG_NOTE, ", ");
70 dump_hex (MSG_NOTE, *max_value);
71 dump_printf (MSG_NOTE, "]\n");
73 return true;
75 else
77 if (dump_enabled_p ())
79 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 dump_printf (MSG_NOTE, " has no range info\n");
82 return false;
86 /* Report that we've found an instance of pattern PATTERN in
87 statement STMT. */
89 static void
90 vect_pattern_detected (const char *name, gimple *stmt)
92 if (dump_enabled_p ())
93 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
96 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97 return the pattern statement's stmt_vec_info. Set its vector type to
98 VECTYPE if it doesn't have one already. */
100 static stmt_vec_info
101 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
102 stmt_vec_info orig_stmt_info, tree vectype)
104 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
105 if (pattern_stmt_info == NULL)
106 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
107 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
109 pattern_stmt_info->pattern_stmt_p = true;
110 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
111 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
112 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
113 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
115 gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
116 == vect_use_mask_type_p (orig_stmt_info));
117 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
118 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
120 return pattern_stmt_info;
123 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
124 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
125 have one already. */
127 static void
128 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
129 stmt_vec_info orig_stmt_info, tree vectype)
131 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
132 STMT_VINFO_RELATED_STMT (orig_stmt_info)
133 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
136 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
137 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
138 be different from the vector type of the final pattern statement.
139 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
140 from which it was derived. */
142 static inline void
143 append_pattern_def_seq (vec_info *vinfo,
144 stmt_vec_info stmt_info, gimple *new_stmt,
145 tree vectype = NULL_TREE,
146 tree scalar_type_for_mask = NULL_TREE)
148 gcc_assert (!scalar_type_for_mask
149 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
150 if (vectype)
152 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
153 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
154 if (scalar_type_for_mask)
155 new_stmt_info->mask_precision
156 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
158 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
159 new_stmt);
162 /* The caller wants to perform new operations on vect_external variable
163 VAR, so that the result of the operations would also be vect_external.
164 Return the edge on which the operations can be performed, if one exists.
165 Return null if the operations should instead be treated as part of
166 the pattern that needs them. */
168 static edge
169 vect_get_external_def_edge (vec_info *vinfo, tree var)
171 edge e = NULL;
172 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
174 e = loop_preheader_edge (loop_vinfo->loop);
175 if (!SSA_NAME_IS_DEFAULT_DEF (var))
177 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
178 if (bb == NULL
179 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
180 e = NULL;
183 return e;
186 /* Return true if the target supports a vector version of CODE,
187 where CODE is known to map to a direct optab. ITYPE specifies
188 the type of (some of) the scalar inputs and OTYPE specifies the
189 type of the scalar result.
191 If CODE allows the inputs and outputs to have different type
192 (such as for WIDEN_SUM_EXPR), it is the input mode rather
193 than the output mode that determines the appropriate target pattern.
194 Operand 0 of the target pattern then specifies the mode that the output
195 must have.
197 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
198 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
199 is nonnull. */
201 static bool
202 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
203 tree itype, tree *vecotype_out,
204 tree *vecitype_out = NULL)
206 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
207 if (!vecitype)
208 return false;
210 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
211 if (!vecotype)
212 return false;
214 optab optab = optab_for_tree_code (code, vecitype, optab_default);
215 if (!optab)
216 return false;
218 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
219 if (icode == CODE_FOR_nothing
220 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
221 return false;
223 *vecotype_out = vecotype;
224 if (vecitype_out)
225 *vecitype_out = vecitype;
226 return true;
229 /* Round bit precision PRECISION up to a full element. */
231 static unsigned int
232 vect_element_precision (unsigned int precision)
234 precision = 1 << ceil_log2 (precision);
235 return MAX (precision, BITS_PER_UNIT);
238 /* If OP is defined by a statement that's being considered for vectorization,
239 return information about that statement, otherwise return NULL. */
241 static stmt_vec_info
242 vect_get_internal_def (vec_info *vinfo, tree op)
244 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
245 if (def_stmt_info
246 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
247 return def_stmt_info;
248 return NULL;
251 /* Check whether NAME, an ssa-name used in STMT_VINFO,
252 is a result of a type promotion, such that:
253 DEF_STMT: NAME = NOP (name0)
254 If CHECK_SIGN is TRUE, check that either both types are signed or both are
255 unsigned. */
257 static bool
258 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
259 tree *orig_type, gimple **def_stmt, bool *promotion)
261 tree type = TREE_TYPE (name);
262 tree oprnd0;
263 enum vect_def_type dt;
265 stmt_vec_info def_stmt_info;
266 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
267 return false;
269 if (dt != vect_internal_def
270 && dt != vect_external_def && dt != vect_constant_def)
271 return false;
273 if (!*def_stmt)
274 return false;
276 if (!is_gimple_assign (*def_stmt))
277 return false;
279 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
280 return false;
282 oprnd0 = gimple_assign_rhs1 (*def_stmt);
284 *orig_type = TREE_TYPE (oprnd0);
285 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
286 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
287 return false;
289 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
290 *promotion = true;
291 else
292 *promotion = false;
294 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
295 return false;
297 return true;
300 /* Holds information about an input operand after some sign changes
301 and type promotions have been peeled away. */
302 class vect_unpromoted_value {
303 public:
304 vect_unpromoted_value ();
306 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
308 /* The value obtained after peeling away zero or more casts. */
309 tree op;
311 /* The type of OP. */
312 tree type;
314 /* The definition type of OP. */
315 vect_def_type dt;
317 /* If OP is the result of peeling at least one cast, and if the cast
318 of OP itself is a vectorizable statement, CASTER identifies that
319 statement, otherwise it is null. */
320 stmt_vec_info caster;
323 inline vect_unpromoted_value::vect_unpromoted_value ()
324 : op (NULL_TREE),
325 type (NULL_TREE),
326 dt (vect_uninitialized_def),
327 caster (NULL)
331 /* Set the operand to OP_IN, its definition type to DT_IN, and the
332 statement that casts it to CASTER_IN. */
334 inline void
335 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
336 stmt_vec_info caster_in)
338 op = op_in;
339 type = TREE_TYPE (op);
340 dt = dt_in;
341 caster = caster_in;
344 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
345 to reach some vectorizable inner operand OP', continuing as long as it
346 is possible to convert OP' back to OP using a possible sign change
347 followed by a possible promotion P. Return this OP', or null if OP is
348 not a vectorizable SSA name. If there is a promotion P, describe its
349 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
350 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
351 have more than one user.
353 A successful return means that it is possible to go from OP' to OP
354 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
355 whereas the cast from UNPROM to OP might be a promotion, a sign
356 change, or a nop.
358 E.g. say we have:
360 signed short *ptr = ...;
361 signed short C = *ptr;
362 unsigned short B = (unsigned short) C; // sign change
363 signed int A = (signed int) B; // unsigned promotion
364 ...possible other uses of A...
365 unsigned int OP = (unsigned int) A; // sign change
367 In this case it's possible to go directly from C to OP using:
369 OP = (unsigned int) (unsigned short) C;
370 +------------+ +--------------+
371 promotion sign change
373 so OP' would be C. The input to the promotion is B, so UNPROM
374 would describe B. */
376 static tree
377 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
378 vect_unpromoted_value *unprom,
379 bool *single_use_p = NULL)
381 tree res = NULL_TREE;
382 tree op_type = TREE_TYPE (op);
383 unsigned int orig_precision = TYPE_PRECISION (op_type);
384 unsigned int min_precision = orig_precision;
385 stmt_vec_info caster = NULL;
386 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
388 /* See whether OP is simple enough to vectorize. */
389 stmt_vec_info def_stmt_info;
390 gimple *def_stmt;
391 vect_def_type dt;
392 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
393 break;
395 /* If OP is the input of a demotion, skip over it to see whether
396 OP is itself the result of a promotion. If so, the combined
397 effect of the promotion and the demotion might fit the required
398 pattern, otherwise neither operation fits.
400 This copes with cases such as the result of an arithmetic
401 operation being truncated before being stored, and where that
402 arithmetic operation has been recognized as an over-widened one. */
403 if (TYPE_PRECISION (op_type) <= min_precision)
405 /* Use OP as the UNPROM described above if we haven't yet
406 found a promotion, or if using the new input preserves the
407 sign of the previous promotion. */
408 if (!res
409 || TYPE_PRECISION (unprom->type) == orig_precision
410 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
412 unprom->set_op (op, dt, caster);
413 min_precision = TYPE_PRECISION (op_type);
415 /* Stop if we've already seen a promotion and if this
416 conversion does more than change the sign. */
417 else if (TYPE_PRECISION (op_type)
418 != TYPE_PRECISION (unprom->type))
419 break;
421 /* The sequence now extends to OP. */
422 res = op;
425 /* See whether OP is defined by a cast. Record it as CASTER if
426 the cast is potentially vectorizable. */
427 if (!def_stmt)
428 break;
429 caster = def_stmt_info;
431 /* Ignore pattern statements, since we don't link uses for them. */
432 if (caster
433 && single_use_p
434 && !STMT_VINFO_RELATED_STMT (caster)
435 && !has_single_use (res))
436 *single_use_p = false;
438 gassign *assign = dyn_cast <gassign *> (def_stmt);
439 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
440 break;
442 /* Continue with the input to the cast. */
443 op = gimple_assign_rhs1 (def_stmt);
444 op_type = TREE_TYPE (op);
446 return res;
449 /* OP is an integer operand to an operation that returns TYPE, and we
450 want to treat the operation as a widening one. So far we can treat
451 it as widening from *COMMON_TYPE.
453 Return true if OP is suitable for such a widening operation,
454 either widening from *COMMON_TYPE or from some supertype of it.
455 Update *COMMON_TYPE to the supertype in the latter case.
457 SHIFT_P is true if OP is a shift amount. */
459 static bool
460 vect_joust_widened_integer (tree type, bool shift_p, tree op,
461 tree *common_type)
463 /* Calculate the minimum precision required by OP, without changing
464 the sign of either operand. */
465 unsigned int precision;
466 if (shift_p)
468 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
469 return false;
470 precision = TREE_INT_CST_LOW (op);
472 else
474 precision = wi::min_precision (wi::to_widest (op),
475 TYPE_SIGN (*common_type));
476 if (precision * 2 > TYPE_PRECISION (type))
477 return false;
480 /* If OP requires a wider type, switch to that type. The checks
481 above ensure that this is still narrower than the result. */
482 precision = vect_element_precision (precision);
483 if (TYPE_PRECISION (*common_type) < precision)
484 *common_type = build_nonstandard_integer_type
485 (precision, TYPE_UNSIGNED (*common_type));
486 return true;
489 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
490 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
492 static bool
493 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
495 if (types_compatible_p (*common_type, new_type))
496 return true;
498 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
499 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
500 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
501 return true;
503 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
504 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
505 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
507 *common_type = new_type;
508 return true;
511 /* We have mismatched signs, with the signed type being
512 no wider than the unsigned type. In this case we need
513 a wider signed type. */
514 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
515 TYPE_PRECISION (new_type));
516 precision *= 2;
517 if (precision * 2 > TYPE_PRECISION (type))
518 return false;
520 *common_type = build_nonstandard_integer_type (precision, false);
521 return true;
524 /* Check whether STMT_INFO can be viewed as a tree of integer operations
525 in which each node either performs CODE or WIDENED_CODE, and where
526 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
527 specifies the maximum number of leaf operands. SHIFT_P says whether
528 CODE and WIDENED_CODE are some sort of shift.
530 If STMT_INFO is such a tree, return the number of leaf operands
531 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
532 to a type that (a) is narrower than the result of STMT_INFO and
533 (b) can hold all leaf operand values.
535 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
536 exists. */
538 static unsigned int
539 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
540 tree_code widened_code, bool shift_p,
541 unsigned int max_nops,
542 vect_unpromoted_value *unprom, tree *common_type)
544 /* Check for an integer operation with the right code. */
545 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
546 if (!assign)
547 return 0;
549 tree_code rhs_code = gimple_assign_rhs_code (assign);
550 if (rhs_code != code && rhs_code != widened_code)
551 return 0;
553 tree type = gimple_expr_type (assign);
554 if (!INTEGRAL_TYPE_P (type))
555 return 0;
557 /* Assume that both operands will be leaf operands. */
558 max_nops -= 2;
560 /* Check the operands. */
561 unsigned int next_op = 0;
562 for (unsigned int i = 0; i < 2; ++i)
564 vect_unpromoted_value *this_unprom = &unprom[next_op];
565 unsigned int nops = 1;
566 tree op = gimple_op (assign, i + 1);
567 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
569 /* We already have a common type from earlier operands.
570 Update it to account for OP. */
571 this_unprom->set_op (op, vect_constant_def);
572 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
573 return 0;
575 else
577 /* Only allow shifts by constants. */
578 if (shift_p && i == 1)
579 return 0;
581 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
582 return 0;
584 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
586 /* The operand isn't widened. If STMT_INFO has the code
587 for an unwidened operation, recursively check whether
588 this operand is a node of the tree. */
589 if (rhs_code != code
590 || max_nops == 0
591 || this_unprom->dt != vect_internal_def)
592 return 0;
594 /* Give back the leaf slot allocated above now that we're
595 not treating this as a leaf operand. */
596 max_nops += 1;
598 /* Recursively process the definition of the operand. */
599 stmt_vec_info def_stmt_info
600 = vinfo->lookup_def (this_unprom->op);
601 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
602 widened_code, shift_p, max_nops,
603 this_unprom, common_type);
604 if (nops == 0)
605 return 0;
607 max_nops -= nops;
609 else
611 /* Make sure that the operand is narrower than the result. */
612 if (TYPE_PRECISION (this_unprom->type) * 2
613 > TYPE_PRECISION (type))
614 return 0;
616 /* Update COMMON_TYPE for the new operand. */
617 if (i == 0)
618 *common_type = this_unprom->type;
619 else if (!vect_joust_widened_type (type, this_unprom->type,
620 common_type))
621 return 0;
624 next_op += nops;
626 return next_op;
629 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
630 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
632 static tree
633 vect_recog_temp_ssa_var (tree type, gimple *stmt)
635 return make_temp_ssa_name (type, stmt, "patt");
638 /* STMT2_INFO describes a type conversion that could be split into STMT1
639 followed by a version of STMT2_INFO that takes NEW_RHS as its first
640 input. Try to do this using pattern statements, returning true on
641 success. */
643 static bool
644 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
645 gimple *stmt1, tree vectype)
647 if (is_pattern_stmt_p (stmt2_info))
649 /* STMT2_INFO is part of a pattern. Get the statement to which
650 the pattern is attached. */
651 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
652 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
654 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE, vect_location,
656 "Splitting pattern statement: %G", stmt2_info->stmt);
658 /* Since STMT2_INFO is a pattern statement, we can change it
659 in-situ without worrying about changing the code for the
660 containing block. */
661 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
663 if (dump_enabled_p ())
665 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
666 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
667 stmt2_info->stmt);
670 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
671 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
672 /* STMT2_INFO is the actual pattern statement. Add STMT1
673 to the end of the definition sequence. */
674 gimple_seq_add_stmt_without_update (def_seq, stmt1);
675 else
677 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
678 before it. */
679 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
680 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
682 return true;
684 else
686 /* STMT2_INFO doesn't yet have a pattern. Try to create a
687 two-statement pattern now. */
688 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
689 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
690 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
691 if (!lhs_vectype)
692 return false;
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_NOTE, vect_location,
696 "Splitting statement: %G", stmt2_info->stmt);
698 /* Add STMT1 as a singleton pattern definition sequence. */
699 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
700 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
701 gimple_seq_add_stmt_without_update (def_seq, stmt1);
703 /* Build the second of the two pattern statements. */
704 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
705 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
706 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
708 if (dump_enabled_p ())
710 dump_printf_loc (MSG_NOTE, vect_location,
711 "into pattern statements: %G", stmt1);
712 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
715 return true;
719 /* Convert UNPROM to TYPE and return the result, adding new statements
720 to STMT_INFO's pattern definition statements if no better way is
721 available. VECTYPE is the vector form of TYPE. */
723 static tree
724 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
725 vect_unpromoted_value *unprom, tree vectype)
727 /* Check for a no-op conversion. */
728 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
729 return unprom->op;
731 /* Allow the caller to create constant vect_unpromoted_values. */
732 if (TREE_CODE (unprom->op) == INTEGER_CST)
733 return wide_int_to_tree (type, wi::to_widest (unprom->op));
735 tree input = unprom->op;
736 if (unprom->caster)
738 tree lhs = gimple_get_lhs (unprom->caster->stmt);
739 tree lhs_type = TREE_TYPE (lhs);
741 /* If the result of the existing cast is the right width, use it
742 instead of the source of the cast. */
743 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
744 input = lhs;
745 /* If the precision we want is between the source and result
746 precisions of the existing cast, try splitting the cast into
747 two and tapping into a mid-way point. */
748 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
749 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
751 /* In order to preserve the semantics of the original cast,
752 give the mid-way point the same signedness as the input value.
754 It would be possible to use a signed type here instead if
755 TYPE is signed and UNPROM->TYPE is unsigned, but that would
756 make the sign of the midtype sensitive to the order in
757 which we process the statements, since the signedness of
758 TYPE is the signedness required by just one of possibly
759 many users. Also, unsigned promotions are usually as cheap
760 as or cheaper than signed ones, so it's better to keep an
761 unsigned promotion. */
762 tree midtype = build_nonstandard_integer_type
763 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
764 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
765 if (vec_midtype)
767 input = vect_recog_temp_ssa_var (midtype, NULL);
768 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
769 unprom->op);
770 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
771 vec_midtype))
772 append_pattern_def_seq (vinfo, stmt_info,
773 new_stmt, vec_midtype);
777 /* See if we can reuse an existing result. */
778 if (types_compatible_p (type, TREE_TYPE (input)))
779 return input;
782 /* We need a new conversion statement. */
783 tree new_op = vect_recog_temp_ssa_var (type, NULL);
784 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
786 /* If OP is an external value, see if we can insert the new statement
787 on an incoming edge. */
788 if (input == unprom->op && unprom->dt == vect_external_def)
789 if (edge e = vect_get_external_def_edge (vinfo, input))
791 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
792 gcc_assert (!new_bb);
793 return new_op;
796 /* As a (common) last resort, add the statement to the pattern itself. */
797 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
798 return new_op;
801 /* Invoke vect_convert_input for N elements of UNPROM and store the
802 result in the corresponding elements of RESULT. */
804 static void
805 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
806 tree *result, tree type, vect_unpromoted_value *unprom,
807 tree vectype)
809 for (unsigned int i = 0; i < n; ++i)
811 unsigned int j;
812 for (j = 0; j < i; ++j)
813 if (unprom[j].op == unprom[i].op)
814 break;
815 if (j < i)
816 result[i] = result[j];
817 else
818 result[i] = vect_convert_input (vinfo, stmt_info,
819 type, &unprom[i], vectype);
823 /* The caller has created a (possibly empty) sequence of pattern definition
824 statements followed by a single statement PATTERN_STMT. Cast the result
825 of this final statement to TYPE. If a new statement is needed, add
826 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
827 and return the new statement, otherwise return PATTERN_STMT as-is.
828 VECITYPE is the vector form of PATTERN_STMT's result type. */
830 static gimple *
831 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
832 gimple *pattern_stmt, tree vecitype)
834 tree lhs = gimple_get_lhs (pattern_stmt);
835 if (!types_compatible_p (type, TREE_TYPE (lhs)))
837 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
838 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
839 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
841 return pattern_stmt;
844 /* Return true if STMT_VINFO describes a reduction for which reassociation
845 is allowed. If STMT_INFO is part of a group, assume that it's part of
846 a reduction chain and optimistically assume that all statements
847 except the last allow reassociation.
848 Also require it to have code CODE and to be a reduction
849 in the outermost loop. When returning true, store the operands in
850 *OP0_OUT and *OP1_OUT. */
852 static bool
853 vect_reassociating_reduction_p (vec_info *vinfo,
854 stmt_vec_info stmt_info, tree_code code,
855 tree *op0_out, tree *op1_out)
857 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
858 if (!loop_info)
859 return false;
861 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
862 if (!assign || gimple_assign_rhs_code (assign) != code)
863 return false;
865 /* We don't allow changing the order of the computation in the inner-loop
866 when doing outer-loop vectorization. */
867 class loop *loop = LOOP_VINFO_LOOP (loop_info);
868 if (loop && nested_in_vect_loop_p (loop, stmt_info))
869 return false;
871 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
873 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
874 code))
875 return false;
877 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
878 return false;
880 *op0_out = gimple_assign_rhs1 (assign);
881 *op1_out = gimple_assign_rhs2 (assign);
882 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
883 std::swap (*op0_out, *op1_out);
884 return true;
887 /* Function vect_recog_dot_prod_pattern
889 Try to find the following pattern:
891 type x_t, y_t;
892 TYPE1 prod;
893 TYPE2 sum = init;
894 loop:
895 sum_0 = phi <init, sum_1>
896 S1 x_t = ...
897 S2 y_t = ...
898 S3 x_T = (TYPE1) x_t;
899 S4 y_T = (TYPE1) y_t;
900 S5 prod = x_T * y_T;
901 [S6 prod = (TYPE2) prod; #optional]
902 S7 sum_1 = prod + sum_0;
904 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
905 same size of 'TYPE1' or bigger. This is a special case of a reduction
906 computation.
908 Input:
910 * STMT_VINFO: The stmt from which the pattern search begins. In the
911 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
912 will be detected.
914 Output:
916 * TYPE_OUT: The type of the output of this pattern.
918 * Return value: A new stmt that will be used to replace the sequence of
919 stmts that constitute the pattern. In this case it will be:
920 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
922 Note: The dot-prod idiom is a widening reduction pattern that is
923 vectorized without preserving all the intermediate results. It
924 produces only N/2 (widened) results (by summing up pairs of
925 intermediate results) rather than all N results. Therefore, we
926 cannot allow this pattern when we want to get all the results and in
927 the correct order (as is the case when this computation is in an
928 inner-loop nested in an outer-loop that us being vectorized). */
930 static gimple *
931 vect_recog_dot_prod_pattern (vec_info *vinfo,
932 stmt_vec_info stmt_vinfo, tree *type_out)
934 tree oprnd0, oprnd1;
935 gimple *last_stmt = stmt_vinfo->stmt;
936 tree type, half_type;
937 gimple *pattern_stmt;
938 tree var;
940 /* Look for the following pattern
941 DX = (TYPE1) X;
942 DY = (TYPE1) Y;
943 DPROD = DX * DY;
944 DDPROD = (TYPE2) DPROD;
945 sum_1 = DDPROD + sum_0;
946 In which
947 - DX is double the size of X
948 - DY is double the size of Y
949 - DX, DY, DPROD all have the same type
950 - sum is the same size of DPROD or bigger
951 - sum has been recognized as a reduction variable.
953 This is equivalent to:
954 DPROD = X w* Y; #widen mult
955 sum_1 = DPROD w+ sum_0; #widen summation
957 DPROD = X w* Y; #widen mult
958 sum_1 = DPROD + sum_0; #summation
961 /* Starting from LAST_STMT, follow the defs of its uses in search
962 of the above pattern. */
964 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
965 &oprnd0, &oprnd1))
966 return NULL;
968 type = gimple_expr_type (last_stmt);
970 vect_unpromoted_value unprom_mult;
971 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
973 /* So far so good. Since last_stmt was detected as a (summation) reduction,
974 we know that oprnd1 is the reduction variable (defined by a loop-header
975 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
976 Left to check that oprnd0 is defined by a (widen_)mult_expr */
977 if (!oprnd0)
978 return NULL;
980 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
981 if (!mult_vinfo)
982 return NULL;
984 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
985 inside the loop (in case we are analyzing an outer-loop). */
986 vect_unpromoted_value unprom0[2];
987 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
988 false, 2, unprom0, &half_type))
989 return NULL;
991 /* If there are two widening operations, make sure they agree on
992 the sign of the extension. */
993 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
994 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
995 return NULL;
997 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
999 tree half_vectype;
1000 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1001 type_out, &half_vectype))
1002 return NULL;
1004 /* Get the inputs in the appropriate types. */
1005 tree mult_oprnd[2];
1006 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1007 unprom0, half_vectype);
1009 var = vect_recog_temp_ssa_var (type, NULL);
1010 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1011 mult_oprnd[0], mult_oprnd[1], oprnd1);
1013 return pattern_stmt;
1017 /* Function vect_recog_sad_pattern
1019 Try to find the following Sum of Absolute Difference (SAD) pattern:
1021 type x_t, y_t;
1022 signed TYPE1 diff, abs_diff;
1023 TYPE2 sum = init;
1024 loop:
1025 sum_0 = phi <init, sum_1>
1026 S1 x_t = ...
1027 S2 y_t = ...
1028 S3 x_T = (TYPE1) x_t;
1029 S4 y_T = (TYPE1) y_t;
1030 S5 diff = x_T - y_T;
1031 S6 abs_diff = ABS_EXPR <diff>;
1032 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1033 S8 sum_1 = abs_diff + sum_0;
1035 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1036 same size of 'TYPE1' or bigger. This is a special case of a reduction
1037 computation.
1039 Input:
1041 * STMT_VINFO: The stmt from which the pattern search begins. In the
1042 example, when this function is called with S8, the pattern
1043 {S3,S4,S5,S6,S7,S8} will be detected.
1045 Output:
1047 * TYPE_OUT: The type of the output of this pattern.
1049 * Return value: A new stmt that will be used to replace the sequence of
1050 stmts that constitute the pattern. In this case it will be:
1051 SAD_EXPR <x_t, y_t, sum_0>
1054 static gimple *
1055 vect_recog_sad_pattern (vec_info *vinfo,
1056 stmt_vec_info stmt_vinfo, tree *type_out)
1058 gimple *last_stmt = stmt_vinfo->stmt;
1059 tree half_type;
1061 /* Look for the following pattern
1062 DX = (TYPE1) X;
1063 DY = (TYPE1) Y;
1064 DDIFF = DX - DY;
1065 DAD = ABS_EXPR <DDIFF>;
1066 DDPROD = (TYPE2) DPROD;
1067 sum_1 = DAD + sum_0;
1068 In which
1069 - DX is at least double the size of X
1070 - DY is at least double the size of Y
1071 - DX, DY, DDIFF, DAD all have the same type
1072 - sum is the same size of DAD or bigger
1073 - sum has been recognized as a reduction variable.
1075 This is equivalent to:
1076 DDIFF = X w- Y; #widen sub
1077 DAD = ABS_EXPR <DDIFF>;
1078 sum_1 = DAD w+ sum_0; #widen summation
1080 DDIFF = X w- Y; #widen sub
1081 DAD = ABS_EXPR <DDIFF>;
1082 sum_1 = DAD + sum_0; #summation
1085 /* Starting from LAST_STMT, follow the defs of its uses in search
1086 of the above pattern. */
1088 tree plus_oprnd0, plus_oprnd1;
1089 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1090 &plus_oprnd0, &plus_oprnd1))
1091 return NULL;
1093 tree sum_type = gimple_expr_type (last_stmt);
1095 /* Any non-truncating sequence of conversions is OK here, since
1096 with a successful match, the result of the ABS(U) is known to fit
1097 within the nonnegative range of the result type. (It cannot be the
1098 negative of the minimum signed value due to the range of the widening
1099 MINUS_EXPR.) */
1100 vect_unpromoted_value unprom_abs;
1101 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1102 &unprom_abs);
1104 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1105 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1106 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1107 Then check that plus_oprnd0 is defined by an abs_expr. */
1109 if (!plus_oprnd0)
1110 return NULL;
1112 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1113 if (!abs_stmt_vinfo)
1114 return NULL;
1116 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1117 inside the loop (in case we are analyzing an outer-loop). */
1118 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1119 if (!abs_stmt
1120 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1121 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1122 return NULL;
1124 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1125 tree abs_type = TREE_TYPE (abs_oprnd);
1126 if (TYPE_UNSIGNED (abs_type))
1127 return NULL;
1129 /* Peel off conversions from the ABS input. This can involve sign
1130 changes (e.g. from an unsigned subtraction to a signed ABS input)
1131 or signed promotion, but it can't include unsigned promotion.
1132 (Note that ABS of an unsigned promotion should have been folded
1133 away before now anyway.) */
1134 vect_unpromoted_value unprom_diff;
1135 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1136 &unprom_diff);
1137 if (!abs_oprnd)
1138 return NULL;
1139 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1140 && TYPE_UNSIGNED (unprom_diff.type))
1141 return NULL;
1143 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1144 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1145 if (!diff_stmt_vinfo)
1146 return NULL;
1148 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1149 inside the loop (in case we are analyzing an outer-loop). */
1150 vect_unpromoted_value unprom[2];
1151 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1152 false, 2, unprom, &half_type))
1153 return NULL;
1155 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1157 tree half_vectype;
1158 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1159 type_out, &half_vectype))
1160 return NULL;
1162 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1163 tree sad_oprnd[2];
1164 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1165 unprom, half_vectype);
1167 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1168 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1169 sad_oprnd[1], plus_oprnd1);
1171 return pattern_stmt;
1174 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1175 so that it can be treated as though it had the form:
1177 A_TYPE a;
1178 B_TYPE b;
1179 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1180 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1181 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1182 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1183 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1185 Try to replace the pattern with:
1187 A_TYPE a;
1188 B_TYPE b;
1189 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1190 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1191 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1192 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1194 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1196 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1197 name of the pattern being matched, for dump purposes. */
1199 static gimple *
1200 vect_recog_widen_op_pattern (vec_info *vinfo,
1201 stmt_vec_info last_stmt_info, tree *type_out,
1202 tree_code orig_code, tree_code wide_code,
1203 bool shift_p, const char *name)
1205 gimple *last_stmt = last_stmt_info->stmt;
1207 vect_unpromoted_value unprom[2];
1208 tree half_type;
1209 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1210 shift_p, 2, unprom, &half_type))
1211 return NULL;
1213 /* Pattern detected. */
1214 vect_pattern_detected (name, last_stmt);
1216 tree type = gimple_expr_type (last_stmt);
1217 tree itype = type;
1218 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1219 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1220 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1221 TYPE_UNSIGNED (half_type));
1223 /* Check target support */
1224 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1225 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1226 enum tree_code dummy_code;
1227 int dummy_int;
1228 auto_vec<tree> dummy_vec;
1229 if (!vectype
1230 || !vecitype
1231 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1232 vecitype, vectype,
1233 &dummy_code, &dummy_code,
1234 &dummy_int, &dummy_vec))
1235 return NULL;
1237 *type_out = get_vectype_for_scalar_type (vinfo, type);
1238 if (!*type_out)
1239 return NULL;
1241 tree oprnd[2];
1242 vect_convert_inputs (vinfo, last_stmt_info,
1243 2, oprnd, half_type, unprom, vectype);
1245 tree var = vect_recog_temp_ssa_var (itype, NULL);
1246 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1247 oprnd[0], oprnd[1]);
1249 return vect_convert_output (vinfo, last_stmt_info,
1250 type, pattern_stmt, vecitype);
1253 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1254 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1256 static gimple *
1257 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1258 tree *type_out)
1260 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1261 MULT_EXPR, WIDEN_MULT_EXPR, false,
1262 "vect_recog_widen_mult_pattern");
1265 /* Function vect_recog_pow_pattern
1267 Try to find the following pattern:
1269 x = POW (y, N);
1271 with POW being one of pow, powf, powi, powif and N being
1272 either 2 or 0.5.
1274 Input:
1276 * STMT_VINFO: The stmt from which the pattern search begins.
1278 Output:
1280 * TYPE_OUT: The type of the output of this pattern.
1282 * Return value: A new stmt that will be used to replace the sequence of
1283 stmts that constitute the pattern. In this case it will be:
1284 x = x * x
1286 x = sqrt (x)
1289 static gimple *
1290 vect_recog_pow_pattern (vec_info *vinfo,
1291 stmt_vec_info stmt_vinfo, tree *type_out)
1293 gimple *last_stmt = stmt_vinfo->stmt;
1294 tree base, exp;
1295 gimple *stmt;
1296 tree var;
1298 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1299 return NULL;
1301 switch (gimple_call_combined_fn (last_stmt))
1303 CASE_CFN_POW:
1304 CASE_CFN_POWI:
1305 break;
1307 default:
1308 return NULL;
1311 base = gimple_call_arg (last_stmt, 0);
1312 exp = gimple_call_arg (last_stmt, 1);
1313 if (TREE_CODE (exp) != REAL_CST
1314 && TREE_CODE (exp) != INTEGER_CST)
1316 if (flag_unsafe_math_optimizations
1317 && TREE_CODE (base) == REAL_CST
1318 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1320 combined_fn log_cfn;
1321 built_in_function exp_bfn;
1322 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1324 case BUILT_IN_POW:
1325 log_cfn = CFN_BUILT_IN_LOG;
1326 exp_bfn = BUILT_IN_EXP;
1327 break;
1328 case BUILT_IN_POWF:
1329 log_cfn = CFN_BUILT_IN_LOGF;
1330 exp_bfn = BUILT_IN_EXPF;
1331 break;
1332 case BUILT_IN_POWL:
1333 log_cfn = CFN_BUILT_IN_LOGL;
1334 exp_bfn = BUILT_IN_EXPL;
1335 break;
1336 default:
1337 return NULL;
1339 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1340 tree exp_decl = builtin_decl_implicit (exp_bfn);
1341 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1342 does that, but if C is a power of 2, we want to use
1343 exp2 (log2 (C) * x) in the non-vectorized version, but for
1344 vectorization we don't have vectorized exp2. */
1345 if (logc
1346 && TREE_CODE (logc) == REAL_CST
1347 && exp_decl
1348 && lookup_attribute ("omp declare simd",
1349 DECL_ATTRIBUTES (exp_decl)))
1351 cgraph_node *node = cgraph_node::get_create (exp_decl);
1352 if (node->simd_clones == NULL)
1354 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1355 || node->definition)
1356 return NULL;
1357 expand_simd_clones (node);
1358 if (node->simd_clones == NULL)
1359 return NULL;
1361 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1362 if (!*type_out)
1363 return NULL;
1364 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1365 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1366 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1367 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1368 g = gimple_build_call (exp_decl, 1, def);
1369 gimple_call_set_lhs (g, res);
1370 return g;
1374 return NULL;
1377 /* We now have a pow or powi builtin function call with a constant
1378 exponent. */
1380 /* Catch squaring. */
1381 if ((tree_fits_shwi_p (exp)
1382 && tree_to_shwi (exp) == 2)
1383 || (TREE_CODE (exp) == REAL_CST
1384 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1386 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1387 TREE_TYPE (base), type_out))
1388 return NULL;
1390 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1391 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1392 return stmt;
1395 /* Catch square root. */
1396 if (TREE_CODE (exp) == REAL_CST
1397 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1399 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1400 if (*type_out
1401 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1402 OPTIMIZE_FOR_SPEED))
1404 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1405 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1406 gimple_call_set_lhs (stmt, var);
1407 gimple_call_set_nothrow (stmt, true);
1408 return stmt;
1412 return NULL;
1416 /* Function vect_recog_widen_sum_pattern
1418 Try to find the following pattern:
1420 type x_t;
1421 TYPE x_T, sum = init;
1422 loop:
1423 sum_0 = phi <init, sum_1>
1424 S1 x_t = *p;
1425 S2 x_T = (TYPE) x_t;
1426 S3 sum_1 = x_T + sum_0;
1428 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1429 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1430 a special case of a reduction computation.
1432 Input:
1434 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1435 when this function is called with S3, the pattern {S2,S3} will be detected.
1437 Output:
1439 * TYPE_OUT: The type of the output of this pattern.
1441 * Return value: A new stmt that will be used to replace the sequence of
1442 stmts that constitute the pattern. In this case it will be:
1443 WIDEN_SUM <x_t, sum_0>
1445 Note: The widening-sum idiom is a widening reduction pattern that is
1446 vectorized without preserving all the intermediate results. It
1447 produces only N/2 (widened) results (by summing up pairs of
1448 intermediate results) rather than all N results. Therefore, we
1449 cannot allow this pattern when we want to get all the results and in
1450 the correct order (as is the case when this computation is in an
1451 inner-loop nested in an outer-loop that us being vectorized). */
1453 static gimple *
1454 vect_recog_widen_sum_pattern (vec_info *vinfo,
1455 stmt_vec_info stmt_vinfo, tree *type_out)
1457 gimple *last_stmt = stmt_vinfo->stmt;
1458 tree oprnd0, oprnd1;
1459 tree type;
1460 gimple *pattern_stmt;
1461 tree var;
1463 /* Look for the following pattern
1464 DX = (TYPE) X;
1465 sum_1 = DX + sum_0;
1466 In which DX is at least double the size of X, and sum_1 has been
1467 recognized as a reduction variable.
1470 /* Starting from LAST_STMT, follow the defs of its uses in search
1471 of the above pattern. */
1473 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1474 &oprnd0, &oprnd1))
1475 return NULL;
1477 type = gimple_expr_type (last_stmt);
1479 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1480 we know that oprnd1 is the reduction variable (defined by a loop-header
1481 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1482 Left to check that oprnd0 is defined by a cast from type 'type' to type
1483 'TYPE'. */
1485 vect_unpromoted_value unprom0;
1486 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1487 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1488 return NULL;
1490 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1492 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1493 unprom0.type, type_out))
1494 return NULL;
1496 var = vect_recog_temp_ssa_var (type, NULL);
1497 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1499 return pattern_stmt;
1502 /* Recognize cases in which an operation is performed in one type WTYPE
1503 but could be done more efficiently in a narrower type NTYPE. For example,
1504 if we have:
1506 ATYPE a; // narrower than NTYPE
1507 BTYPE b; // narrower than NTYPE
1508 WTYPE aw = (WTYPE) a;
1509 WTYPE bw = (WTYPE) b;
1510 WTYPE res = aw + bw; // only uses of aw and bw
1512 then it would be more efficient to do:
1514 NTYPE an = (NTYPE) a;
1515 NTYPE bn = (NTYPE) b;
1516 NTYPE resn = an + bn;
1517 WTYPE res = (WTYPE) resn;
1519 Other situations include things like:
1521 ATYPE a; // NTYPE or narrower
1522 WTYPE aw = (WTYPE) a;
1523 WTYPE res = aw + b;
1525 when only "(NTYPE) res" is significant. In that case it's more efficient
1526 to truncate "b" and do the operation on NTYPE instead:
1528 NTYPE an = (NTYPE) a;
1529 NTYPE bn = (NTYPE) b; // truncation
1530 NTYPE resn = an + bn;
1531 WTYPE res = (WTYPE) resn;
1533 All users of "res" should then use "resn" instead, making the final
1534 statement dead (not marked as relevant). The final statement is still
1535 needed to maintain the type correctness of the IR.
1537 vect_determine_precisions has already determined the minimum
1538 precison of the operation and the minimum precision required
1539 by users of the result. */
1541 static gimple *
1542 vect_recog_over_widening_pattern (vec_info *vinfo,
1543 stmt_vec_info last_stmt_info, tree *type_out)
1545 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1546 if (!last_stmt)
1547 return NULL;
1549 /* See whether we have found that this operation can be done on a
1550 narrower type without changing its semantics. */
1551 unsigned int new_precision = last_stmt_info->operation_precision;
1552 if (!new_precision)
1553 return NULL;
1555 tree lhs = gimple_assign_lhs (last_stmt);
1556 tree type = TREE_TYPE (lhs);
1557 tree_code code = gimple_assign_rhs_code (last_stmt);
1559 /* Keep the first operand of a COND_EXPR as-is: only the other two
1560 operands are interesting. */
1561 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1563 /* Check the operands. */
1564 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1565 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1566 unprom.quick_grow (nops);
1567 unsigned int min_precision = 0;
1568 bool single_use_p = false;
1569 for (unsigned int i = 0; i < nops; ++i)
1571 tree op = gimple_op (last_stmt, first_op + i);
1572 if (TREE_CODE (op) == INTEGER_CST)
1573 unprom[i].set_op (op, vect_constant_def);
1574 else if (TREE_CODE (op) == SSA_NAME)
1576 bool op_single_use_p = true;
1577 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1578 &op_single_use_p))
1579 return NULL;
1580 /* If:
1582 (1) N bits of the result are needed;
1583 (2) all inputs are widened from M<N bits; and
1584 (3) one operand OP is a single-use SSA name
1586 we can shift the M->N widening from OP to the output
1587 without changing the number or type of extensions involved.
1588 This then reduces the number of copies of STMT_INFO.
1590 If instead of (3) more than one operand is a single-use SSA name,
1591 shifting the extension to the output is even more of a win.
1593 If instead:
1595 (1) N bits of the result are needed;
1596 (2) one operand OP2 is widened from M2<N bits;
1597 (3) another operand OP1 is widened from M1<M2 bits; and
1598 (4) both OP1 and OP2 are single-use
1600 the choice is between:
1602 (a) truncating OP2 to M1, doing the operation on M1,
1603 and then widening the result to N
1605 (b) widening OP1 to M2, doing the operation on M2, and then
1606 widening the result to N
1608 Both shift the M2->N widening of the inputs to the output.
1609 (a) additionally shifts the M1->M2 widening to the output;
1610 it requires fewer copies of STMT_INFO but requires an extra
1611 M2->M1 truncation.
1613 Which is better will depend on the complexity and cost of
1614 STMT_INFO, which is hard to predict at this stage. However,
1615 a clear tie-breaker in favor of (b) is the fact that the
1616 truncation in (a) increases the length of the operation chain.
1618 If instead of (4) only one of OP1 or OP2 is single-use,
1619 (b) is still a win over doing the operation in N bits:
1620 it still shifts the M2->N widening on the single-use operand
1621 to the output and reduces the number of STMT_INFO copies.
1623 If neither operand is single-use then operating on fewer than
1624 N bits might lead to more extensions overall. Whether it does
1625 or not depends on global information about the vectorization
1626 region, and whether that's a good trade-off would again
1627 depend on the complexity and cost of the statements involved,
1628 as well as things like register pressure that are not normally
1629 modelled at this stage. We therefore ignore these cases
1630 and just optimize the clear single-use wins above.
1632 Thus we take the maximum precision of the unpromoted operands
1633 and record whether any operand is single-use. */
1634 if (unprom[i].dt == vect_internal_def)
1636 min_precision = MAX (min_precision,
1637 TYPE_PRECISION (unprom[i].type));
1638 single_use_p |= op_single_use_p;
1641 else
1642 return NULL;
1645 /* Although the operation could be done in operation_precision, we have
1646 to balance that against introducing extra truncations or extensions.
1647 Calculate the minimum precision that can be handled efficiently.
1649 The loop above determined that the operation could be handled
1650 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1651 extension from the inputs to the output without introducing more
1652 instructions, and would reduce the number of instructions required
1653 for STMT_INFO itself.
1655 vect_determine_precisions has also determined that the result only
1656 needs min_output_precision bits. Truncating by a factor of N times
1657 requires a tree of N - 1 instructions, so if TYPE is N times wider
1658 than min_output_precision, doing the operation in TYPE and truncating
1659 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1660 In contrast:
1662 - truncating the input to a unary operation and doing the operation
1663 in the new type requires at most N - 1 + 1 = N instructions per
1664 output vector
1666 - doing the same for a binary operation requires at most
1667 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1669 Both unary and binary operations require fewer instructions than
1670 this if the operands were extended from a suitable truncated form.
1671 Thus there is usually nothing to lose by doing operations in
1672 min_output_precision bits, but there can be something to gain. */
1673 if (!single_use_p)
1674 min_precision = last_stmt_info->min_output_precision;
1675 else
1676 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1678 /* Apply the minimum efficient precision we just calculated. */
1679 if (new_precision < min_precision)
1680 new_precision = min_precision;
1681 if (new_precision >= TYPE_PRECISION (type))
1682 return NULL;
1684 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1686 *type_out = get_vectype_for_scalar_type (vinfo, type);
1687 if (!*type_out)
1688 return NULL;
1690 /* We've found a viable pattern. Get the new type of the operation. */
1691 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1692 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1694 /* If we're truncating an operation, we need to make sure that we
1695 don't introduce new undefined overflow. The codes tested here are
1696 a subset of those accepted by vect_truncatable_operation_p. */
1697 tree op_type = new_type;
1698 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1699 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1700 op_type = build_nonstandard_integer_type (new_precision, true);
1702 /* We specifically don't check here whether the target supports the
1703 new operation, since it might be something that a later pattern
1704 wants to rewrite anyway. If targets have a minimum element size
1705 for some optabs, we should pattern-match smaller ops to larger ops
1706 where beneficial. */
1707 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1708 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1709 if (!new_vectype || !op_vectype)
1710 return NULL;
1712 if (dump_enabled_p ())
1713 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1714 type, new_type);
1716 /* Calculate the rhs operands for an operation on OP_TYPE. */
1717 tree ops[3] = {};
1718 for (unsigned int i = 1; i < first_op; ++i)
1719 ops[i - 1] = gimple_op (last_stmt, i);
1720 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
1721 op_type, &unprom[0], op_vectype);
1723 /* Use the operation to produce a result of type OP_TYPE. */
1724 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1725 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1726 ops[0], ops[1], ops[2]);
1727 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1729 if (dump_enabled_p ())
1730 dump_printf_loc (MSG_NOTE, vect_location,
1731 "created pattern stmt: %G", pattern_stmt);
1733 /* Convert back to the original signedness, if OP_TYPE is different
1734 from NEW_TYPE. */
1735 if (op_type != new_type)
1736 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
1737 pattern_stmt, op_vectype);
1739 /* Promote the result to the original type. */
1740 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
1741 pattern_stmt, new_vectype);
1743 return pattern_stmt;
1746 /* Recognize the following patterns:
1748 ATYPE a; // narrower than TYPE
1749 BTYPE b; // narrower than TYPE
1751 1) Multiply high with scaling
1752 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1753 2) ... or also with rounding
1754 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1756 where only the bottom half of res is used. */
1758 static gimple *
1759 vect_recog_mulhs_pattern (vec_info *vinfo,
1760 stmt_vec_info last_stmt_info, tree *type_out)
1762 /* Check for a right shift. */
1763 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1764 if (!last_stmt
1765 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1766 return NULL;
1768 /* Check that the shift result is wider than the users of the
1769 result need (i.e. that narrowing would be a natural choice). */
1770 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1771 unsigned int target_precision
1772 = vect_element_precision (last_stmt_info->min_output_precision);
1773 if (!INTEGRAL_TYPE_P (lhs_type)
1774 || target_precision >= TYPE_PRECISION (lhs_type))
1775 return NULL;
1777 /* Look through any change in sign on the outer shift input. */
1778 vect_unpromoted_value unprom_rshift_input;
1779 tree rshift_input = vect_look_through_possible_promotion
1780 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1781 if (!rshift_input
1782 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1783 != TYPE_PRECISION (lhs_type))
1784 return NULL;
1786 /* Get the definition of the shift input. */
1787 stmt_vec_info rshift_input_stmt_info
1788 = vect_get_internal_def (vinfo, rshift_input);
1789 if (!rshift_input_stmt_info)
1790 return NULL;
1791 gassign *rshift_input_stmt
1792 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1793 if (!rshift_input_stmt)
1794 return NULL;
1796 stmt_vec_info mulh_stmt_info;
1797 tree scale_term;
1798 internal_fn ifn;
1799 unsigned int expect_offset;
1801 /* Check for the presence of the rounding term. */
1802 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1804 /* Check that the outer shift was by 1. */
1805 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1806 return NULL;
1808 /* Check that the second operand of the PLUS_EXPR is 1. */
1809 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1810 return NULL;
1812 /* Look through any change in sign on the addition input. */
1813 vect_unpromoted_value unprom_plus_input;
1814 tree plus_input = vect_look_through_possible_promotion
1815 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1816 if (!plus_input
1817 || TYPE_PRECISION (TREE_TYPE (plus_input))
1818 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
1819 return NULL;
1821 /* Get the definition of the multiply-high-scale part. */
1822 stmt_vec_info plus_input_stmt_info
1823 = vect_get_internal_def (vinfo, plus_input);
1824 if (!plus_input_stmt_info)
1825 return NULL;
1826 gassign *plus_input_stmt
1827 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1828 if (!plus_input_stmt
1829 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1830 return NULL;
1832 /* Look through any change in sign on the scaling input. */
1833 vect_unpromoted_value unprom_scale_input;
1834 tree scale_input = vect_look_through_possible_promotion
1835 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1836 if (!scale_input
1837 || TYPE_PRECISION (TREE_TYPE (scale_input))
1838 != TYPE_PRECISION (TREE_TYPE (plus_input)))
1839 return NULL;
1841 /* Get the definition of the multiply-high part. */
1842 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1843 if (!mulh_stmt_info)
1844 return NULL;
1846 /* Get the scaling term. */
1847 scale_term = gimple_assign_rhs2 (plus_input_stmt);
1849 expect_offset = target_precision + 2;
1850 ifn = IFN_MULHRS;
1852 else
1854 mulh_stmt_info = rshift_input_stmt_info;
1855 scale_term = gimple_assign_rhs2 (last_stmt);
1857 expect_offset = target_precision + 1;
1858 ifn = IFN_MULHS;
1861 /* Check that the scaling factor is correct. */
1862 if (TREE_CODE (scale_term) != INTEGER_CST
1863 || wi::to_widest (scale_term) + expect_offset
1864 != TYPE_PRECISION (lhs_type))
1865 return NULL;
1867 /* Check whether the scaling input term can be seen as two widened
1868 inputs multiplied together. */
1869 vect_unpromoted_value unprom_mult[2];
1870 tree new_type;
1871 unsigned int nops
1872 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1873 false, 2, unprom_mult, &new_type);
1874 if (nops != 2)
1875 return NULL;
1877 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1879 /* Adjust output precision. */
1880 if (TYPE_PRECISION (new_type) < target_precision)
1881 new_type = build_nonstandard_integer_type
1882 (target_precision, TYPE_UNSIGNED (new_type));
1884 /* Check for target support. */
1885 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1886 if (!new_vectype
1887 || !direct_internal_fn_supported_p
1888 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1889 return NULL;
1891 /* The IR requires a valid vector type for the cast result, even though
1892 it's likely to be discarded. */
1893 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
1894 if (!*type_out)
1895 return NULL;
1897 /* Generate the IFN_MULHRS call. */
1898 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1899 tree new_ops[2];
1900 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
1901 unprom_mult, new_vectype);
1902 gcall *mulhrs_stmt
1903 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1904 gimple_call_set_lhs (mulhrs_stmt, new_var);
1905 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1907 if (dump_enabled_p ())
1908 dump_printf_loc (MSG_NOTE, vect_location,
1909 "created pattern stmt: %G", mulhrs_stmt);
1911 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
1912 mulhrs_stmt, new_vectype);
1915 /* Recognize the patterns:
1917 ATYPE a; // narrower than TYPE
1918 BTYPE b; // narrower than TYPE
1919 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1920 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1922 where only the bottom half of avg is used. Try to transform them into:
1924 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1925 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1927 followed by:
1929 TYPE avg = (TYPE) avg';
1931 where NTYPE is no wider than half of TYPE. Since only the bottom half
1932 of avg is used, all or part of the cast of avg' should become redundant.
1934 If there is no target support available, generate code to distribute rshift
1935 over plus and add a carry. */
1937 static gimple *
1938 vect_recog_average_pattern (vec_info *vinfo,
1939 stmt_vec_info last_stmt_info, tree *type_out)
1941 /* Check for a shift right by one bit. */
1942 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1943 if (!last_stmt
1944 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1945 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1946 return NULL;
1948 /* Check that the shift result is wider than the users of the
1949 result need (i.e. that narrowing would be a natural choice). */
1950 tree lhs = gimple_assign_lhs (last_stmt);
1951 tree type = TREE_TYPE (lhs);
1952 unsigned int target_precision
1953 = vect_element_precision (last_stmt_info->min_output_precision);
1954 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1955 return NULL;
1957 /* Look through any change in sign on the shift input. */
1958 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1959 vect_unpromoted_value unprom_plus;
1960 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1961 &unprom_plus);
1962 if (!rshift_rhs
1963 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1964 return NULL;
1966 /* Get the definition of the shift input. */
1967 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1968 if (!plus_stmt_info)
1969 return NULL;
1971 /* Check whether the shift input can be seen as a tree of additions on
1972 2 or 3 widened inputs.
1974 Note that the pattern should be a win even if the result of one or
1975 more additions is reused elsewhere: if the pattern matches, we'd be
1976 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1977 internal_fn ifn = IFN_AVG_FLOOR;
1978 vect_unpromoted_value unprom[3];
1979 tree new_type;
1980 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
1981 PLUS_EXPR, false, 3,
1982 unprom, &new_type);
1983 if (nops == 0)
1984 return NULL;
1985 if (nops == 3)
1987 /* Check that one operand is 1. */
1988 unsigned int i;
1989 for (i = 0; i < 3; ++i)
1990 if (integer_onep (unprom[i].op))
1991 break;
1992 if (i == 3)
1993 return NULL;
1994 /* Throw away the 1 operand and keep the other two. */
1995 if (i < 2)
1996 unprom[i] = unprom[2];
1997 ifn = IFN_AVG_CEIL;
2000 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2002 /* We know that:
2004 (a) the operation can be viewed as:
2006 TYPE widened0 = (TYPE) UNPROM[0];
2007 TYPE widened1 = (TYPE) UNPROM[1];
2008 TYPE tmp1 = widened0 + widened1 {+ 1};
2009 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2011 (b) the first two statements are equivalent to:
2013 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2014 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2016 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2017 where sensible;
2019 (d) all the operations can be performed correctly at twice the width of
2020 NEW_TYPE, due to the nature of the average operation; and
2022 (e) users of the result of the right shift need only TARGET_PRECISION
2023 bits, where TARGET_PRECISION is no more than half of TYPE's
2024 precision.
2026 Under these circumstances, the only situation in which NEW_TYPE
2027 could be narrower than TARGET_PRECISION is if widened0, widened1
2028 and an addition result are all used more than once. Thus we can
2029 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2030 as "free", whereas widening the result of the average instruction
2031 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2032 therefore better not to go narrower than TARGET_PRECISION. */
2033 if (TYPE_PRECISION (new_type) < target_precision)
2034 new_type = build_nonstandard_integer_type (target_precision,
2035 TYPE_UNSIGNED (new_type));
2037 /* Check for target support. */
2038 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2039 if (!new_vectype)
2040 return NULL;
2042 bool fallback_p = false;
2044 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2046 else if (TYPE_UNSIGNED (new_type)
2047 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2048 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2049 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2050 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2051 fallback_p = true;
2052 else
2053 return NULL;
2055 /* The IR requires a valid vector type for the cast result, even though
2056 it's likely to be discarded. */
2057 *type_out = get_vectype_for_scalar_type (vinfo, type);
2058 if (!*type_out)
2059 return NULL;
2061 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2062 tree new_ops[2];
2063 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2064 unprom, new_vectype);
2066 if (fallback_p)
2068 /* As a fallback, generate code for following sequence:
2070 shifted_op0 = new_ops[0] >> 1;
2071 shifted_op1 = new_ops[1] >> 1;
2072 sum_of_shifted = shifted_op0 + shifted_op1;
2073 unmasked_carry = new_ops[0] and/or new_ops[1];
2074 carry = unmasked_carry & 1;
2075 new_var = sum_of_shifted + carry;
2078 tree one_cst = build_one_cst (new_type);
2079 gassign *g;
2081 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2082 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2083 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2085 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2086 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2087 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2089 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2090 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2091 shifted_op0, shifted_op1);
2092 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2094 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2095 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2096 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2097 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2099 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2100 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2101 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2103 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2104 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2107 /* Generate the IFN_AVG* call. */
2108 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2109 new_ops[1]);
2110 gimple_call_set_lhs (average_stmt, new_var);
2111 gimple_set_location (average_stmt, gimple_location (last_stmt));
2113 if (dump_enabled_p ())
2114 dump_printf_loc (MSG_NOTE, vect_location,
2115 "created pattern stmt: %G", average_stmt);
2117 return vect_convert_output (vinfo, last_stmt_info,
2118 type, average_stmt, new_vectype);
2121 /* Recognize cases in which the input to a cast is wider than its
2122 output, and the input is fed by a widening operation. Fold this
2123 by removing the unnecessary intermediate widening. E.g.:
2125 unsigned char a;
2126 unsigned int b = (unsigned int) a;
2127 unsigned short c = (unsigned short) b;
2131 unsigned short c = (unsigned short) a;
2133 Although this is rare in input IR, it is an expected side-effect
2134 of the over-widening pattern above.
2136 This is beneficial also for integer-to-float conversions, if the
2137 widened integer has more bits than the float, and if the unwidened
2138 input doesn't. */
2140 static gimple *
2141 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2142 stmt_vec_info last_stmt_info, tree *type_out)
2144 /* Check for a cast, including an integer-to-float conversion. */
2145 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2146 if (!last_stmt)
2147 return NULL;
2148 tree_code code = gimple_assign_rhs_code (last_stmt);
2149 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2150 return NULL;
2152 /* Make sure that the rhs is a scalar with a natural bitsize. */
2153 tree lhs = gimple_assign_lhs (last_stmt);
2154 if (!lhs)
2155 return NULL;
2156 tree lhs_type = TREE_TYPE (lhs);
2157 scalar_mode lhs_mode;
2158 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2159 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2160 return NULL;
2162 /* Check for a narrowing operation (from a vector point of view). */
2163 tree rhs = gimple_assign_rhs1 (last_stmt);
2164 tree rhs_type = TREE_TYPE (rhs);
2165 if (!INTEGRAL_TYPE_P (rhs_type)
2166 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2167 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2168 return NULL;
2170 /* Try to find an unpromoted input. */
2171 vect_unpromoted_value unprom;
2172 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2173 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2174 return NULL;
2176 /* If the bits above RHS_TYPE matter, make sure that they're the
2177 same when extending from UNPROM as they are when extending from RHS. */
2178 if (!INTEGRAL_TYPE_P (lhs_type)
2179 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2180 return NULL;
2182 /* We can get the same result by casting UNPROM directly, to avoid
2183 the unnecessary widening and narrowing. */
2184 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2186 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2187 if (!*type_out)
2188 return NULL;
2190 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2191 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2192 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2194 return pattern_stmt;
2197 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2198 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2200 static gimple *
2201 vect_recog_widen_shift_pattern (vec_info *vinfo,
2202 stmt_vec_info last_stmt_info, tree *type_out)
2204 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2205 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2206 "vect_recog_widen_shift_pattern");
2209 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2211 type a_t, b_t, c_t;
2213 S0 a_t = b_t r<< c_t;
2215 Input/Output:
2217 * STMT_VINFO: The stmt from which the pattern search begins,
2218 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2219 with a sequence:
2221 S1 d_t = -c_t;
2222 S2 e_t = d_t & (B - 1);
2223 S3 f_t = b_t << c_t;
2224 S4 g_t = b_t >> e_t;
2225 S0 a_t = f_t | g_t;
2227 where B is element bitsize of type.
2229 Output:
2231 * TYPE_OUT: The type of the output of this pattern.
2233 * Return value: A new stmt that will be used to replace the rotate
2234 S0 stmt. */
2236 static gimple *
2237 vect_recog_rotate_pattern (vec_info *vinfo,
2238 stmt_vec_info stmt_vinfo, tree *type_out)
2240 gimple *last_stmt = stmt_vinfo->stmt;
2241 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2242 gimple *pattern_stmt, *def_stmt;
2243 enum tree_code rhs_code;
2244 enum vect_def_type dt;
2245 optab optab1, optab2;
2246 edge ext_def = NULL;
2247 bool bswap16_p = false;
2249 if (is_gimple_assign (last_stmt))
2251 rhs_code = gimple_assign_rhs_code (last_stmt);
2252 switch (rhs_code)
2254 case LROTATE_EXPR:
2255 case RROTATE_EXPR:
2256 break;
2257 default:
2258 return NULL;
2261 lhs = gimple_assign_lhs (last_stmt);
2262 oprnd0 = gimple_assign_rhs1 (last_stmt);
2263 type = TREE_TYPE (oprnd0);
2264 oprnd1 = gimple_assign_rhs2 (last_stmt);
2266 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2268 /* __builtin_bswap16 (x) is another form of x r>> 8.
2269 The vectorizer has bswap support, but only if the argument isn't
2270 promoted. */
2271 lhs = gimple_call_lhs (last_stmt);
2272 oprnd0 = gimple_call_arg (last_stmt, 0);
2273 type = TREE_TYPE (oprnd0);
2274 if (!lhs
2275 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2276 || TYPE_PRECISION (type) <= 16
2277 || TREE_CODE (oprnd0) != SSA_NAME
2278 || BITS_PER_UNIT != 8
2279 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2280 return NULL;
2282 stmt_vec_info def_stmt_info;
2283 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2284 return NULL;
2286 if (dt != vect_internal_def)
2287 return NULL;
2289 if (gimple_assign_cast_p (def_stmt))
2291 def = gimple_assign_rhs1 (def_stmt);
2292 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2293 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2294 oprnd0 = def;
2297 type = TREE_TYPE (lhs);
2298 vectype = get_vectype_for_scalar_type (vinfo, type);
2299 if (vectype == NULL_TREE)
2300 return NULL;
2302 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2304 /* The encoding uses one stepped pattern for each byte in the
2305 16-bit word. */
2306 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2307 for (unsigned i = 0; i < 3; ++i)
2308 for (unsigned j = 0; j < 2; ++j)
2309 elts.quick_push ((i + 1) * 2 - j - 1);
2311 vec_perm_indices indices (elts, 1,
2312 TYPE_VECTOR_SUBPARTS (char_vectype));
2313 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2315 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2316 undo the argument promotion. */
2317 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2319 def = vect_recog_temp_ssa_var (type, NULL);
2320 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2321 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2322 oprnd0 = def;
2325 /* Pattern detected. */
2326 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2328 *type_out = vectype;
2330 /* Pattern supported. Create a stmt to be used to replace the
2331 pattern, with the unpromoted argument. */
2332 var = vect_recog_temp_ssa_var (type, NULL);
2333 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2334 1, oprnd0);
2335 gimple_call_set_lhs (pattern_stmt, var);
2336 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2337 gimple_call_fntype (last_stmt));
2338 return pattern_stmt;
2342 oprnd1 = build_int_cst (integer_type_node, 8);
2343 rhs_code = LROTATE_EXPR;
2344 bswap16_p = true;
2346 else
2347 return NULL;
2349 if (TREE_CODE (oprnd0) != SSA_NAME
2350 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2351 || !INTEGRAL_TYPE_P (type)
2352 || !TYPE_UNSIGNED (type))
2353 return NULL;
2355 stmt_vec_info def_stmt_info;
2356 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2357 return NULL;
2359 if (dt != vect_internal_def
2360 && dt != vect_constant_def
2361 && dt != vect_external_def)
2362 return NULL;
2364 vectype = get_vectype_for_scalar_type (vinfo, type);
2365 if (vectype == NULL_TREE)
2366 return NULL;
2368 /* If vector/vector or vector/scalar rotate is supported by the target,
2369 don't do anything here. */
2370 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2371 if (optab1
2372 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2374 use_rotate:
2375 if (bswap16_p)
2377 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2379 def = vect_recog_temp_ssa_var (type, NULL);
2380 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2381 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2382 oprnd0 = def;
2385 /* Pattern detected. */
2386 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2388 *type_out = vectype;
2390 /* Pattern supported. Create a stmt to be used to replace the
2391 pattern. */
2392 var = vect_recog_temp_ssa_var (type, NULL);
2393 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2394 oprnd1);
2395 return pattern_stmt;
2397 return NULL;
2400 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2402 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2403 if (optab2
2404 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2405 goto use_rotate;
2408 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2409 don't do anything here either. */
2410 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2411 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2412 if (!optab1
2413 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2414 || !optab2
2415 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2417 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2418 return NULL;
2419 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2420 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2421 if (!optab1
2422 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2423 || !optab2
2424 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2425 return NULL;
2428 *type_out = vectype;
2430 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2432 def = vect_recog_temp_ssa_var (type, NULL);
2433 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2434 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2435 oprnd0 = def;
2438 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2439 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2441 def = NULL_TREE;
2442 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2443 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2444 def = oprnd1;
2445 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2447 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2448 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2449 && TYPE_PRECISION (TREE_TYPE (rhs1))
2450 == TYPE_PRECISION (type))
2451 def = rhs1;
2454 if (def == NULL_TREE)
2456 def = vect_recog_temp_ssa_var (type, NULL);
2457 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2458 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2460 stype = TREE_TYPE (def);
2462 if (TREE_CODE (def) == INTEGER_CST)
2464 if (!tree_fits_uhwi_p (def)
2465 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2466 || integer_zerop (def))
2467 return NULL;
2468 def2 = build_int_cst (stype,
2469 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2471 else
2473 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2475 if (vecstype == NULL_TREE)
2476 return NULL;
2477 def2 = vect_recog_temp_ssa_var (stype, NULL);
2478 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2479 if (ext_def)
2481 basic_block new_bb
2482 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2483 gcc_assert (!new_bb);
2485 else
2486 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2488 def2 = vect_recog_temp_ssa_var (stype, NULL);
2489 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2490 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2491 gimple_assign_lhs (def_stmt), mask);
2492 if (ext_def)
2494 basic_block new_bb
2495 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2496 gcc_assert (!new_bb);
2498 else
2499 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2502 var1 = vect_recog_temp_ssa_var (type, NULL);
2503 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2504 ? LSHIFT_EXPR : RSHIFT_EXPR,
2505 oprnd0, def);
2506 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2508 var2 = vect_recog_temp_ssa_var (type, NULL);
2509 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2510 ? RSHIFT_EXPR : LSHIFT_EXPR,
2511 oprnd0, def2);
2512 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2514 /* Pattern detected. */
2515 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2517 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2518 var = vect_recog_temp_ssa_var (type, NULL);
2519 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2521 return pattern_stmt;
2524 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2525 vectorized:
2527 type a_t;
2528 TYPE b_T, res_T;
2530 S1 a_t = ;
2531 S2 b_T = ;
2532 S3 res_T = b_T op a_t;
2534 where type 'TYPE' is a type with different size than 'type',
2535 and op is <<, >> or rotate.
2537 Also detect cases:
2539 type a_t;
2540 TYPE b_T, c_T, res_T;
2542 S0 c_T = ;
2543 S1 a_t = (type) c_T;
2544 S2 b_T = ;
2545 S3 res_T = b_T op a_t;
2547 Input/Output:
2549 * STMT_VINFO: The stmt from which the pattern search begins,
2550 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2551 with a shift/rotate which has same type on both operands, in the
2552 second case just b_T op c_T, in the first case with added cast
2553 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2555 Output:
2557 * TYPE_OUT: The type of the output of this pattern.
2559 * Return value: A new stmt that will be used to replace the shift/rotate
2560 S3 stmt. */
2562 static gimple *
2563 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2564 stmt_vec_info stmt_vinfo,
2565 tree *type_out)
2567 gimple *last_stmt = stmt_vinfo->stmt;
2568 tree oprnd0, oprnd1, lhs, var;
2569 gimple *pattern_stmt;
2570 enum tree_code rhs_code;
2572 if (!is_gimple_assign (last_stmt))
2573 return NULL;
2575 rhs_code = gimple_assign_rhs_code (last_stmt);
2576 switch (rhs_code)
2578 case LSHIFT_EXPR:
2579 case RSHIFT_EXPR:
2580 case LROTATE_EXPR:
2581 case RROTATE_EXPR:
2582 break;
2583 default:
2584 return NULL;
2587 lhs = gimple_assign_lhs (last_stmt);
2588 oprnd0 = gimple_assign_rhs1 (last_stmt);
2589 oprnd1 = gimple_assign_rhs2 (last_stmt);
2590 if (TREE_CODE (oprnd0) != SSA_NAME
2591 || TREE_CODE (oprnd1) != SSA_NAME
2592 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2593 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2594 || TYPE_PRECISION (TREE_TYPE (lhs))
2595 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2596 return NULL;
2598 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2599 if (!def_vinfo)
2600 return NULL;
2602 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2603 if (*type_out == NULL_TREE)
2604 return NULL;
2606 tree def = NULL_TREE;
2607 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2608 if (def_stmt && gimple_assign_cast_p (def_stmt))
2610 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2611 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2612 && TYPE_PRECISION (TREE_TYPE (rhs1))
2613 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2615 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2616 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2617 def = rhs1;
2618 else
2620 tree mask
2621 = build_low_bits_mask (TREE_TYPE (rhs1),
2622 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2623 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2624 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2625 tree vecstype = get_vectype_for_scalar_type (vinfo,
2626 TREE_TYPE (rhs1));
2627 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2632 if (def == NULL_TREE)
2634 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2635 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2636 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2639 /* Pattern detected. */
2640 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2642 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2643 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2644 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2646 return pattern_stmt;
2649 /* Return true iff the target has a vector optab implementing the operation
2650 CODE on type VECTYPE. */
2652 static bool
2653 target_has_vecop_for_code (tree_code code, tree vectype)
2655 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2656 return voptab
2657 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2660 /* Verify that the target has optabs of VECTYPE to perform all the steps
2661 needed by the multiplication-by-immediate synthesis algorithm described by
2662 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2663 present. Return true iff the target supports all the steps. */
2665 static bool
2666 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2667 tree vectype, bool synth_shift_p)
2669 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2670 return false;
2672 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2673 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2675 if (var == negate_variant
2676 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2677 return false;
2679 /* If we must synthesize shifts with additions make sure that vector
2680 addition is available. */
2681 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2682 return false;
2684 for (int i = 1; i < alg->ops; i++)
2686 switch (alg->op[i])
2688 case alg_shift:
2689 break;
2690 case alg_add_t_m2:
2691 case alg_add_t2_m:
2692 case alg_add_factor:
2693 if (!supports_vplus)
2694 return false;
2695 break;
2696 case alg_sub_t_m2:
2697 case alg_sub_t2_m:
2698 case alg_sub_factor:
2699 if (!supports_vminus)
2700 return false;
2701 break;
2702 case alg_unknown:
2703 case alg_m:
2704 case alg_zero:
2705 case alg_impossible:
2706 return false;
2707 default:
2708 gcc_unreachable ();
2712 return true;
2715 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2716 putting the final result in DEST. Append all statements but the last into
2717 VINFO. Return the last statement. */
2719 static gimple *
2720 synth_lshift_by_additions (vec_info *vinfo,
2721 tree dest, tree op, HOST_WIDE_INT amnt,
2722 stmt_vec_info stmt_info)
2724 HOST_WIDE_INT i;
2725 tree itype = TREE_TYPE (op);
2726 tree prev_res = op;
2727 gcc_assert (amnt >= 0);
2728 for (i = 0; i < amnt; i++)
2730 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2731 : dest;
2732 gimple *stmt
2733 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2734 prev_res = tmp_var;
2735 if (i < amnt - 1)
2736 append_pattern_def_seq (vinfo, stmt_info, stmt);
2737 else
2738 return stmt;
2740 gcc_unreachable ();
2741 return NULL;
2744 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2745 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2746 the process if necessary. Append the resulting assignment statements
2747 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2748 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2749 left shifts using additions. */
2751 static tree
2752 apply_binop_and_append_stmt (vec_info *vinfo,
2753 tree_code code, tree op1, tree op2,
2754 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2756 if (integer_zerop (op2)
2757 && (code == LSHIFT_EXPR
2758 || code == PLUS_EXPR))
2760 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2761 return op1;
2764 gimple *stmt;
2765 tree itype = TREE_TYPE (op1);
2766 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2768 if (code == LSHIFT_EXPR
2769 && synth_shift_p)
2771 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
2772 TREE_INT_CST_LOW (op2), stmt_vinfo);
2773 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2774 return tmp_var;
2777 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2778 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2779 return tmp_var;
2782 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2783 and simple arithmetic operations to be vectorized. Record the statements
2784 produced in STMT_VINFO and return the last statement in the sequence or
2785 NULL if it's not possible to synthesize such a multiplication.
2786 This function mirrors the behavior of expand_mult_const in expmed.c but
2787 works on tree-ssa form. */
2789 static gimple *
2790 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
2791 stmt_vec_info stmt_vinfo)
2793 tree itype = TREE_TYPE (op);
2794 machine_mode mode = TYPE_MODE (itype);
2795 struct algorithm alg;
2796 mult_variant variant;
2797 if (!tree_fits_shwi_p (val))
2798 return NULL;
2800 /* Multiplication synthesis by shifts, adds and subs can introduce
2801 signed overflow where the original operation didn't. Perform the
2802 operations on an unsigned type and cast back to avoid this.
2803 In the future we may want to relax this for synthesis algorithms
2804 that we can prove do not cause unexpected overflow. */
2805 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2807 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2809 /* Targets that don't support vector shifts but support vector additions
2810 can synthesize shifts that way. */
2811 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
2813 HOST_WIDE_INT hwval = tree_to_shwi (val);
2814 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2815 The vectorizer's benefit analysis will decide whether it's beneficial
2816 to do this. */
2817 bool possible = choose_mult_variant (mode, hwval, &alg,
2818 &variant, MAX_COST);
2819 if (!possible)
2820 return NULL;
2822 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
2824 if (!vectype
2825 || !target_supports_mult_synth_alg (&alg, variant,
2826 vectype, synth_shift_p))
2827 return NULL;
2829 tree accumulator;
2831 /* Clear out the sequence of statements so we can populate it below. */
2832 gimple *stmt = NULL;
2834 if (cast_to_unsigned_p)
2836 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2837 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2838 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2839 op = tmp_op;
2842 if (alg.op[0] == alg_zero)
2843 accumulator = build_int_cst (multtype, 0);
2844 else
2845 accumulator = op;
2847 bool needs_fixup = (variant == negate_variant)
2848 || (variant == add_variant);
2850 for (int i = 1; i < alg.ops; i++)
2852 tree shft_log = build_int_cst (multtype, alg.log[i]);
2853 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2854 tree tmp_var = NULL_TREE;
2856 switch (alg.op[i])
2858 case alg_shift:
2859 if (synth_shift_p)
2860 stmt
2861 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
2862 alg.log[i], stmt_vinfo);
2863 else
2864 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2865 shft_log);
2866 break;
2867 case alg_add_t_m2:
2868 tmp_var
2869 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
2870 stmt_vinfo, synth_shift_p);
2871 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2872 tmp_var);
2873 break;
2874 case alg_sub_t_m2:
2875 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
2876 shft_log, stmt_vinfo,
2877 synth_shift_p);
2878 /* In some algorithms the first step involves zeroing the
2879 accumulator. If subtracting from such an accumulator
2880 just emit the negation directly. */
2881 if (integer_zerop (accumulator))
2882 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2883 else
2884 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2885 tmp_var);
2886 break;
2887 case alg_add_t2_m:
2888 tmp_var
2889 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2890 shft_log, stmt_vinfo, synth_shift_p);
2891 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2892 break;
2893 case alg_sub_t2_m:
2894 tmp_var
2895 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2896 shft_log, stmt_vinfo, synth_shift_p);
2897 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2898 break;
2899 case alg_add_factor:
2900 tmp_var
2901 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2902 shft_log, stmt_vinfo, synth_shift_p);
2903 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2904 tmp_var);
2905 break;
2906 case alg_sub_factor:
2907 tmp_var
2908 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2909 shft_log, stmt_vinfo, synth_shift_p);
2910 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2911 accumulator);
2912 break;
2913 default:
2914 gcc_unreachable ();
2916 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2917 but rather return it directly. */
2919 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2920 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2921 accumulator = accum_tmp;
2923 if (variant == negate_variant)
2925 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2926 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2927 accumulator = accum_tmp;
2928 if (cast_to_unsigned_p)
2929 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2931 else if (variant == add_variant)
2933 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2934 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2935 accumulator = accum_tmp;
2936 if (cast_to_unsigned_p)
2937 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2939 /* Move back to a signed if needed. */
2940 if (cast_to_unsigned_p)
2942 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2943 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2946 return stmt;
2949 /* Detect multiplication by constant and convert it into a sequence of
2950 shifts and additions, subtractions, negations. We reuse the
2951 choose_mult_variant algorithms from expmed.c
2953 Input/Output:
2955 STMT_VINFO: The stmt from which the pattern search begins,
2956 i.e. the mult stmt.
2958 Output:
2960 * TYPE_OUT: The type of the output of this pattern.
2962 * Return value: A new stmt that will be used to replace
2963 the multiplication. */
2965 static gimple *
2966 vect_recog_mult_pattern (vec_info *vinfo,
2967 stmt_vec_info stmt_vinfo, tree *type_out)
2969 gimple *last_stmt = stmt_vinfo->stmt;
2970 tree oprnd0, oprnd1, vectype, itype;
2971 gimple *pattern_stmt;
2973 if (!is_gimple_assign (last_stmt))
2974 return NULL;
2976 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2977 return NULL;
2979 oprnd0 = gimple_assign_rhs1 (last_stmt);
2980 oprnd1 = gimple_assign_rhs2 (last_stmt);
2981 itype = TREE_TYPE (oprnd0);
2983 if (TREE_CODE (oprnd0) != SSA_NAME
2984 || TREE_CODE (oprnd1) != INTEGER_CST
2985 || !INTEGRAL_TYPE_P (itype)
2986 || !type_has_mode_precision_p (itype))
2987 return NULL;
2989 vectype = get_vectype_for_scalar_type (vinfo, itype);
2990 if (vectype == NULL_TREE)
2991 return NULL;
2993 /* If the target can handle vectorized multiplication natively,
2994 don't attempt to optimize this. */
2995 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2996 if (mul_optab != unknown_optab)
2998 machine_mode vec_mode = TYPE_MODE (vectype);
2999 int icode = (int) optab_handler (mul_optab, vec_mode);
3000 if (icode != CODE_FOR_nothing)
3001 return NULL;
3004 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3005 oprnd0, oprnd1, stmt_vinfo);
3006 if (!pattern_stmt)
3007 return NULL;
3009 /* Pattern detected. */
3010 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3012 *type_out = vectype;
3014 return pattern_stmt;
3017 /* Detect a signed division by a constant that wouldn't be
3018 otherwise vectorized:
3020 type a_t, b_t;
3022 S1 a_t = b_t / N;
3024 where type 'type' is an integral type and N is a constant.
3026 Similarly handle modulo by a constant:
3028 S4 a_t = b_t % N;
3030 Input/Output:
3032 * STMT_VINFO: The stmt from which the pattern search begins,
3033 i.e. the division stmt. S1 is replaced by if N is a power
3034 of two constant and type is signed:
3035 S3 y_t = b_t < 0 ? N - 1 : 0;
3036 S2 x_t = b_t + y_t;
3037 S1' a_t = x_t >> log2 (N);
3039 S4 is replaced if N is a power of two constant and
3040 type is signed by (where *_T temporaries have unsigned type):
3041 S9 y_T = b_t < 0 ? -1U : 0U;
3042 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3043 S7 z_t = (type) z_T;
3044 S6 w_t = b_t + z_t;
3045 S5 x_t = w_t & (N - 1);
3046 S4' a_t = x_t - z_t;
3048 Output:
3050 * TYPE_OUT: The type of the output of this pattern.
3052 * Return value: A new stmt that will be used to replace the division
3053 S1 or modulo S4 stmt. */
3055 static gimple *
3056 vect_recog_divmod_pattern (vec_info *vinfo,
3057 stmt_vec_info stmt_vinfo, tree *type_out)
3059 gimple *last_stmt = stmt_vinfo->stmt;
3060 tree oprnd0, oprnd1, vectype, itype, cond;
3061 gimple *pattern_stmt, *def_stmt;
3062 enum tree_code rhs_code;
3063 optab optab;
3064 tree q;
3065 int dummy_int, prec;
3067 if (!is_gimple_assign (last_stmt))
3068 return NULL;
3070 rhs_code = gimple_assign_rhs_code (last_stmt);
3071 switch (rhs_code)
3073 case TRUNC_DIV_EXPR:
3074 case EXACT_DIV_EXPR:
3075 case TRUNC_MOD_EXPR:
3076 break;
3077 default:
3078 return NULL;
3081 oprnd0 = gimple_assign_rhs1 (last_stmt);
3082 oprnd1 = gimple_assign_rhs2 (last_stmt);
3083 itype = TREE_TYPE (oprnd0);
3084 if (TREE_CODE (oprnd0) != SSA_NAME
3085 || TREE_CODE (oprnd1) != INTEGER_CST
3086 || TREE_CODE (itype) != INTEGER_TYPE
3087 || !type_has_mode_precision_p (itype))
3088 return NULL;
3090 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3091 vectype = get_vectype_for_scalar_type (vinfo, itype);
3092 if (vectype == NULL_TREE)
3093 return NULL;
3095 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3097 /* If the target can handle vectorized division or modulo natively,
3098 don't attempt to optimize this, since native division is likely
3099 to give smaller code. */
3100 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3101 if (optab != unknown_optab)
3103 machine_mode vec_mode = TYPE_MODE (vectype);
3104 int icode = (int) optab_handler (optab, vec_mode);
3105 if (icode != CODE_FOR_nothing)
3106 return NULL;
3110 prec = TYPE_PRECISION (itype);
3111 if (integer_pow2p (oprnd1))
3113 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3114 return NULL;
3116 /* Pattern detected. */
3117 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3119 *type_out = vectype;
3121 /* Check if the target supports this internal function. */
3122 internal_fn ifn = IFN_DIV_POW2;
3123 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3125 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3127 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3128 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3129 gimple_call_set_lhs (div_stmt, var_div);
3131 if (rhs_code == TRUNC_MOD_EXPR)
3133 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3134 def_stmt
3135 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3136 LSHIFT_EXPR, var_div, shift);
3137 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3138 pattern_stmt
3139 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3140 MINUS_EXPR, oprnd0,
3141 gimple_assign_lhs (def_stmt));
3143 else
3144 pattern_stmt = div_stmt;
3145 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3147 return pattern_stmt;
3150 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3151 build_int_cst (itype, 0));
3152 if (rhs_code == TRUNC_DIV_EXPR
3153 || rhs_code == EXACT_DIV_EXPR)
3155 tree var = vect_recog_temp_ssa_var (itype, NULL);
3156 tree shift;
3157 def_stmt
3158 = gimple_build_assign (var, COND_EXPR, cond,
3159 fold_build2 (MINUS_EXPR, itype, oprnd1,
3160 build_int_cst (itype, 1)),
3161 build_int_cst (itype, 0));
3162 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3163 var = vect_recog_temp_ssa_var (itype, NULL);
3164 def_stmt
3165 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3166 gimple_assign_lhs (def_stmt));
3167 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3169 shift = build_int_cst (itype, tree_log2 (oprnd1));
3170 pattern_stmt
3171 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3172 RSHIFT_EXPR, var, shift);
3174 else
3176 tree signmask;
3177 if (compare_tree_int (oprnd1, 2) == 0)
3179 signmask = vect_recog_temp_ssa_var (itype, NULL);
3180 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3181 build_int_cst (itype, 1),
3182 build_int_cst (itype, 0));
3183 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3185 else
3187 tree utype
3188 = build_nonstandard_integer_type (prec, 1);
3189 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3190 tree shift
3191 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3192 - tree_log2 (oprnd1));
3193 tree var = vect_recog_temp_ssa_var (utype, NULL);
3195 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3196 build_int_cst (utype, -1),
3197 build_int_cst (utype, 0));
3198 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3199 var = vect_recog_temp_ssa_var (utype, NULL);
3200 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3201 gimple_assign_lhs (def_stmt),
3202 shift);
3203 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3204 signmask = vect_recog_temp_ssa_var (itype, NULL);
3205 def_stmt
3206 = gimple_build_assign (signmask, NOP_EXPR, var);
3207 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3209 def_stmt
3210 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3211 PLUS_EXPR, oprnd0, signmask);
3212 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3213 def_stmt
3214 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3215 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3216 fold_build2 (MINUS_EXPR, itype, oprnd1,
3217 build_int_cst (itype, 1)));
3218 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3220 pattern_stmt
3221 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3222 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3223 signmask);
3226 return pattern_stmt;
3229 if (prec > HOST_BITS_PER_WIDE_INT
3230 || integer_zerop (oprnd1))
3231 return NULL;
3233 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3234 return NULL;
3236 if (TYPE_UNSIGNED (itype))
3238 unsigned HOST_WIDE_INT mh, ml;
3239 int pre_shift, post_shift;
3240 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3241 & GET_MODE_MASK (itype_mode));
3242 tree t1, t2, t3, t4;
3244 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3245 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3246 return NULL;
3248 /* Find a suitable multiplier and right shift count
3249 instead of multiplying with D. */
3250 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3252 /* If the suggested multiplier is more than SIZE bits, we can do better
3253 for even divisors, using an initial right shift. */
3254 if (mh != 0 && (d & 1) == 0)
3256 pre_shift = ctz_or_zero (d);
3257 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3258 &ml, &post_shift, &dummy_int);
3259 gcc_assert (!mh);
3261 else
3262 pre_shift = 0;
3264 if (mh != 0)
3266 if (post_shift - 1 >= prec)
3267 return NULL;
3269 /* t1 = oprnd0 h* ml;
3270 t2 = oprnd0 - t1;
3271 t3 = t2 >> 1;
3272 t4 = t1 + t3;
3273 q = t4 >> (post_shift - 1); */
3274 t1 = vect_recog_temp_ssa_var (itype, NULL);
3275 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3276 build_int_cst (itype, ml));
3277 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3279 t2 = vect_recog_temp_ssa_var (itype, NULL);
3280 def_stmt
3281 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3282 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3284 t3 = vect_recog_temp_ssa_var (itype, NULL);
3285 def_stmt
3286 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3287 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3289 t4 = vect_recog_temp_ssa_var (itype, NULL);
3290 def_stmt
3291 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3293 if (post_shift != 1)
3295 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3297 q = vect_recog_temp_ssa_var (itype, NULL);
3298 pattern_stmt
3299 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3300 build_int_cst (itype, post_shift - 1));
3302 else
3304 q = t4;
3305 pattern_stmt = def_stmt;
3308 else
3310 if (pre_shift >= prec || post_shift >= prec)
3311 return NULL;
3313 /* t1 = oprnd0 >> pre_shift;
3314 t2 = t1 h* ml;
3315 q = t2 >> post_shift; */
3316 if (pre_shift)
3318 t1 = vect_recog_temp_ssa_var (itype, NULL);
3319 def_stmt
3320 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3321 build_int_cst (NULL, pre_shift));
3322 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3324 else
3325 t1 = oprnd0;
3327 t2 = vect_recog_temp_ssa_var (itype, NULL);
3328 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3329 build_int_cst (itype, ml));
3331 if (post_shift)
3333 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3335 q = vect_recog_temp_ssa_var (itype, NULL);
3336 def_stmt
3337 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3338 build_int_cst (itype, post_shift));
3340 else
3341 q = t2;
3343 pattern_stmt = def_stmt;
3346 else
3348 unsigned HOST_WIDE_INT ml;
3349 int post_shift;
3350 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3351 unsigned HOST_WIDE_INT abs_d;
3352 bool add = false;
3353 tree t1, t2, t3, t4;
3355 /* Give up for -1. */
3356 if (d == -1)
3357 return NULL;
3359 /* Since d might be INT_MIN, we have to cast to
3360 unsigned HOST_WIDE_INT before negating to avoid
3361 undefined signed overflow. */
3362 abs_d = (d >= 0
3363 ? (unsigned HOST_WIDE_INT) d
3364 : - (unsigned HOST_WIDE_INT) d);
3366 /* n rem d = n rem -d */
3367 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3369 d = abs_d;
3370 oprnd1 = build_int_cst (itype, abs_d);
3372 if (HOST_BITS_PER_WIDE_INT >= prec
3373 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3374 /* This case is not handled correctly below. */
3375 return NULL;
3377 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3378 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3380 add = true;
3381 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3383 if (post_shift >= prec)
3384 return NULL;
3386 /* t1 = oprnd0 h* ml; */
3387 t1 = vect_recog_temp_ssa_var (itype, NULL);
3388 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3389 build_int_cst (itype, ml));
3391 if (add)
3393 /* t2 = t1 + oprnd0; */
3394 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3395 t2 = vect_recog_temp_ssa_var (itype, NULL);
3396 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3398 else
3399 t2 = t1;
3401 if (post_shift)
3403 /* t3 = t2 >> post_shift; */
3404 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3405 t3 = vect_recog_temp_ssa_var (itype, NULL);
3406 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3407 build_int_cst (itype, post_shift));
3409 else
3410 t3 = t2;
3412 wide_int oprnd0_min, oprnd0_max;
3413 int msb = 1;
3414 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3416 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3417 msb = 0;
3418 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3419 msb = -1;
3422 if (msb == 0 && d >= 0)
3424 /* q = t3; */
3425 q = t3;
3426 pattern_stmt = def_stmt;
3428 else
3430 /* t4 = oprnd0 >> (prec - 1);
3431 or if we know from VRP that oprnd0 >= 0
3432 t4 = 0;
3433 or if we know from VRP that oprnd0 < 0
3434 t4 = -1; */
3435 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3436 t4 = vect_recog_temp_ssa_var (itype, NULL);
3437 if (msb != 1)
3438 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3439 build_int_cst (itype, msb));
3440 else
3441 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3442 build_int_cst (itype, prec - 1));
3443 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3445 /* q = t3 - t4; or q = t4 - t3; */
3446 q = vect_recog_temp_ssa_var (itype, NULL);
3447 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3448 d < 0 ? t3 : t4);
3452 if (rhs_code == TRUNC_MOD_EXPR)
3454 tree r, t1;
3456 /* We divided. Now finish by:
3457 t1 = q * oprnd1;
3458 r = oprnd0 - t1; */
3459 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3461 t1 = vect_recog_temp_ssa_var (itype, NULL);
3462 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3463 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3465 r = vect_recog_temp_ssa_var (itype, NULL);
3466 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3469 /* Pattern detected. */
3470 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3472 *type_out = vectype;
3473 return pattern_stmt;
3476 /* Function vect_recog_mixed_size_cond_pattern
3478 Try to find the following pattern:
3480 type x_t, y_t;
3481 TYPE a_T, b_T, c_T;
3482 loop:
3483 S1 a_T = x_t CMP y_t ? b_T : c_T;
3485 where type 'TYPE' is an integral type which has different size
3486 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3487 than 'type', the constants need to fit into an integer type
3488 with the same width as 'type') or results of conversion from 'type'.
3490 Input:
3492 * STMT_VINFO: The stmt from which the pattern search begins.
3494 Output:
3496 * TYPE_OUT: The type of the output of this pattern.
3498 * Return value: A new stmt that will be used to replace the pattern.
3499 Additionally a def_stmt is added.
3501 a_it = x_t CMP y_t ? b_it : c_it;
3502 a_T = (TYPE) a_it; */
3504 static gimple *
3505 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3506 stmt_vec_info stmt_vinfo, tree *type_out)
3508 gimple *last_stmt = stmt_vinfo->stmt;
3509 tree cond_expr, then_clause, else_clause;
3510 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3511 gimple *pattern_stmt, *def_stmt;
3512 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3513 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3514 bool promotion;
3515 tree comp_scalar_type;
3517 if (!is_gimple_assign (last_stmt)
3518 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3519 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3520 return NULL;
3522 cond_expr = gimple_assign_rhs1 (last_stmt);
3523 then_clause = gimple_assign_rhs2 (last_stmt);
3524 else_clause = gimple_assign_rhs3 (last_stmt);
3526 if (!COMPARISON_CLASS_P (cond_expr))
3527 return NULL;
3529 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3530 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3531 if (comp_vectype == NULL_TREE)
3532 return NULL;
3534 type = gimple_expr_type (last_stmt);
3535 if (types_compatible_p (type, comp_scalar_type)
3536 || ((TREE_CODE (then_clause) != INTEGER_CST
3537 || TREE_CODE (else_clause) != INTEGER_CST)
3538 && !INTEGRAL_TYPE_P (comp_scalar_type))
3539 || !INTEGRAL_TYPE_P (type))
3540 return NULL;
3542 if ((TREE_CODE (then_clause) != INTEGER_CST
3543 && !type_conversion_p (vinfo, then_clause, false,
3544 &orig_type0, &def_stmt0, &promotion))
3545 || (TREE_CODE (else_clause) != INTEGER_CST
3546 && !type_conversion_p (vinfo, else_clause, false,
3547 &orig_type1, &def_stmt1, &promotion)))
3548 return NULL;
3550 if (orig_type0 && orig_type1
3551 && !types_compatible_p (orig_type0, orig_type1))
3552 return NULL;
3554 if (orig_type0)
3556 if (!types_compatible_p (orig_type0, comp_scalar_type))
3557 return NULL;
3558 then_clause = gimple_assign_rhs1 (def_stmt0);
3559 itype = orig_type0;
3562 if (orig_type1)
3564 if (!types_compatible_p (orig_type1, comp_scalar_type))
3565 return NULL;
3566 else_clause = gimple_assign_rhs1 (def_stmt1);
3567 itype = orig_type1;
3571 HOST_WIDE_INT cmp_mode_size
3572 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3574 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3575 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3576 return NULL;
3578 vectype = get_vectype_for_scalar_type (vinfo, type);
3579 if (vectype == NULL_TREE)
3580 return NULL;
3582 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3583 return NULL;
3585 if (itype == NULL_TREE)
3586 itype = build_nonstandard_integer_type (cmp_mode_size,
3587 TYPE_UNSIGNED (type));
3589 if (itype == NULL_TREE
3590 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3591 return NULL;
3593 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3594 if (vecitype == NULL_TREE)
3595 return NULL;
3597 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3598 return NULL;
3600 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3602 if ((TREE_CODE (then_clause) == INTEGER_CST
3603 && !int_fits_type_p (then_clause, itype))
3604 || (TREE_CODE (else_clause) == INTEGER_CST
3605 && !int_fits_type_p (else_clause, itype)))
3606 return NULL;
3609 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3610 COND_EXPR, unshare_expr (cond_expr),
3611 fold_convert (itype, then_clause),
3612 fold_convert (itype, else_clause));
3613 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3614 NOP_EXPR, gimple_assign_lhs (def_stmt));
3616 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3617 *type_out = vectype;
3619 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3621 return pattern_stmt;
3625 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3626 true if bool VAR can and should be optimized that way. Assume it shouldn't
3627 in case it's a result of a comparison which can be directly vectorized into
3628 a vector comparison. Fills in STMTS with all stmts visited during the
3629 walk. */
3631 static bool
3632 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3634 tree rhs1;
3635 enum tree_code rhs_code;
3637 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3638 if (!def_stmt_info)
3639 return false;
3641 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3642 if (!def_stmt)
3643 return false;
3645 if (stmts.contains (def_stmt))
3646 return true;
3648 rhs1 = gimple_assign_rhs1 (def_stmt);
3649 rhs_code = gimple_assign_rhs_code (def_stmt);
3650 switch (rhs_code)
3652 case SSA_NAME:
3653 if (! check_bool_pattern (rhs1, vinfo, stmts))
3654 return false;
3655 break;
3657 CASE_CONVERT:
3658 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3659 return false;
3660 if (! check_bool_pattern (rhs1, vinfo, stmts))
3661 return false;
3662 break;
3664 case BIT_NOT_EXPR:
3665 if (! check_bool_pattern (rhs1, vinfo, stmts))
3666 return false;
3667 break;
3669 case BIT_AND_EXPR:
3670 case BIT_IOR_EXPR:
3671 case BIT_XOR_EXPR:
3672 if (! check_bool_pattern (rhs1, vinfo, stmts)
3673 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3674 return false;
3675 break;
3677 default:
3678 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3680 tree vecitype, comp_vectype;
3682 /* If the comparison can throw, then is_gimple_condexpr will be
3683 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3684 if (stmt_could_throw_p (cfun, def_stmt))
3685 return false;
3687 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3688 if (comp_vectype == NULL_TREE)
3689 return false;
3691 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3692 TREE_TYPE (rhs1));
3693 if (mask_type
3694 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3695 return false;
3697 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3699 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3700 tree itype
3701 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3702 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3703 if (vecitype == NULL_TREE)
3704 return false;
3706 else
3707 vecitype = comp_vectype;
3708 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3709 return false;
3711 else
3712 return false;
3713 break;
3716 bool res = stmts.add (def_stmt);
3717 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3718 gcc_assert (!res);
3720 return true;
3724 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3725 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3726 pattern sequence. */
3728 static tree
3729 adjust_bool_pattern_cast (vec_info *vinfo,
3730 tree type, tree var, stmt_vec_info stmt_info)
3732 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3733 NOP_EXPR, var);
3734 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
3735 get_vectype_for_scalar_type (vinfo, type));
3736 return gimple_assign_lhs (cast_stmt);
3739 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3740 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3741 type, OUT_TYPE is the desired final integer type of the whole pattern.
3742 STMT_INFO is the info of the pattern root and is where pattern stmts should
3743 be associated with. DEFS is a map of pattern defs. */
3745 static void
3746 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
3747 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3749 gimple *stmt = SSA_NAME_DEF_STMT (var);
3750 enum tree_code rhs_code, def_rhs_code;
3751 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3752 location_t loc;
3753 gimple *pattern_stmt, *def_stmt;
3754 tree trueval = NULL_TREE;
3756 rhs1 = gimple_assign_rhs1 (stmt);
3757 rhs2 = gimple_assign_rhs2 (stmt);
3758 rhs_code = gimple_assign_rhs_code (stmt);
3759 loc = gimple_location (stmt);
3760 switch (rhs_code)
3762 case SSA_NAME:
3763 CASE_CONVERT:
3764 irhs1 = *defs.get (rhs1);
3765 itype = TREE_TYPE (irhs1);
3766 pattern_stmt
3767 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3768 SSA_NAME, irhs1);
3769 break;
3771 case BIT_NOT_EXPR:
3772 irhs1 = *defs.get (rhs1);
3773 itype = TREE_TYPE (irhs1);
3774 pattern_stmt
3775 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3776 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3777 break;
3779 case BIT_AND_EXPR:
3780 /* Try to optimize x = y & (a < b ? 1 : 0); into
3781 x = (a < b ? y : 0);
3783 E.g. for:
3784 bool a_b, b_b, c_b;
3785 TYPE d_T;
3787 S1 a_b = x1 CMP1 y1;
3788 S2 b_b = x2 CMP2 y2;
3789 S3 c_b = a_b & b_b;
3790 S4 d_T = (TYPE) c_b;
3792 we would normally emit:
3794 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3795 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3796 S3' c_T = a_T & b_T;
3797 S4' d_T = c_T;
3799 but we can save one stmt by using the
3800 result of one of the COND_EXPRs in the other COND_EXPR and leave
3801 BIT_AND_EXPR stmt out:
3803 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3804 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3805 S4' f_T = c_T;
3807 At least when VEC_COND_EXPR is implemented using masks
3808 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3809 computes the comparison masks and ands it, in one case with
3810 all ones vector, in the other case with a vector register.
3811 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3812 often more expensive. */
3813 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3814 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3815 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3817 irhs1 = *defs.get (rhs1);
3818 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3819 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3820 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3822 rhs_code = def_rhs_code;
3823 rhs1 = def_rhs1;
3824 rhs2 = gimple_assign_rhs2 (def_stmt);
3825 trueval = irhs1;
3826 goto do_compare;
3828 else
3829 irhs2 = *defs.get (rhs2);
3830 goto and_ior_xor;
3832 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3833 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3834 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3836 irhs2 = *defs.get (rhs2);
3837 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3838 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3839 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3841 rhs_code = def_rhs_code;
3842 rhs1 = def_rhs1;
3843 rhs2 = gimple_assign_rhs2 (def_stmt);
3844 trueval = irhs2;
3845 goto do_compare;
3847 else
3848 irhs1 = *defs.get (rhs1);
3849 goto and_ior_xor;
3851 /* FALLTHRU */
3852 case BIT_IOR_EXPR:
3853 case BIT_XOR_EXPR:
3854 irhs1 = *defs.get (rhs1);
3855 irhs2 = *defs.get (rhs2);
3856 and_ior_xor:
3857 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3858 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3860 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3861 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3862 int out_prec = TYPE_PRECISION (out_type);
3863 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3864 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
3865 stmt_info);
3866 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3867 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
3868 stmt_info);
3869 else
3871 irhs1 = adjust_bool_pattern_cast (vinfo,
3872 out_type, irhs1, stmt_info);
3873 irhs2 = adjust_bool_pattern_cast (vinfo,
3874 out_type, irhs2, stmt_info);
3877 itype = TREE_TYPE (irhs1);
3878 pattern_stmt
3879 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3880 rhs_code, irhs1, irhs2);
3881 break;
3883 default:
3884 do_compare:
3885 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3886 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3887 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3888 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3889 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3891 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3892 itype
3893 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3895 else
3896 itype = TREE_TYPE (rhs1);
3897 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3898 if (trueval == NULL_TREE)
3899 trueval = build_int_cst (itype, 1);
3900 else
3901 gcc_checking_assert (useless_type_conversion_p (itype,
3902 TREE_TYPE (trueval)));
3903 pattern_stmt
3904 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3905 COND_EXPR, cond_expr, trueval,
3906 build_int_cst (itype, 0));
3907 break;
3910 gimple_set_location (pattern_stmt, loc);
3911 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
3912 get_vectype_for_scalar_type (vinfo, itype));
3913 defs.put (var, gimple_assign_lhs (pattern_stmt));
3916 /* Comparison function to qsort a vector of gimple stmts after UID. */
3918 static int
3919 sort_after_uid (const void *p1, const void *p2)
3921 const gimple *stmt1 = *(const gimple * const *)p1;
3922 const gimple *stmt2 = *(const gimple * const *)p2;
3923 return gimple_uid (stmt1) - gimple_uid (stmt2);
3926 /* Create pattern stmts for all stmts participating in the bool pattern
3927 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3928 OUT_TYPE. Return the def of the pattern root. */
3930 static tree
3931 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
3932 tree out_type, stmt_vec_info stmt_info)
3934 /* Gather original stmts in the bool pattern in their order of appearance
3935 in the IL. */
3936 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3937 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3938 i != bool_stmt_set.end (); ++i)
3939 bool_stmts.quick_push (*i);
3940 bool_stmts.qsort (sort_after_uid);
3942 /* Now process them in that order, producing pattern stmts. */
3943 hash_map <tree, tree> defs;
3944 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3945 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
3946 out_type, stmt_info, defs);
3948 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3949 gimple *pattern_stmt
3950 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3951 return gimple_assign_lhs (pattern_stmt);
3954 /* Return the proper type for converting bool VAR into
3955 an integer value or NULL_TREE if no such type exists.
3956 The type is chosen so that the converted value has the
3957 same number of elements as VAR's vector type. */
3959 static tree
3960 integer_type_for_mask (tree var, vec_info *vinfo)
3962 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3963 return NULL_TREE;
3965 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3966 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
3967 return NULL_TREE;
3969 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
3972 /* Function vect_recog_bool_pattern
3974 Try to find pattern like following:
3976 bool a_b, b_b, c_b, d_b, e_b;
3977 TYPE f_T;
3978 loop:
3979 S1 a_b = x1 CMP1 y1;
3980 S2 b_b = x2 CMP2 y2;
3981 S3 c_b = a_b & b_b;
3982 S4 d_b = x3 CMP3 y3;
3983 S5 e_b = c_b | d_b;
3984 S6 f_T = (TYPE) e_b;
3986 where type 'TYPE' is an integral type. Or a similar pattern
3987 ending in
3989 S6 f_Y = e_b ? r_Y : s_Y;
3991 as results from if-conversion of a complex condition.
3993 Input:
3995 * STMT_VINFO: The stmt at the end from which the pattern
3996 search begins, i.e. cast of a bool to
3997 an integer type.
3999 Output:
4001 * TYPE_OUT: The type of the output of this pattern.
4003 * Return value: A new stmt that will be used to replace the pattern.
4005 Assuming size of TYPE is the same as size of all comparisons
4006 (otherwise some casts would be added where needed), the above
4007 sequence we create related pattern stmts:
4008 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4009 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4010 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4011 S5' e_T = c_T | d_T;
4012 S6' f_T = e_T;
4014 Instead of the above S3' we could emit:
4015 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4016 S3' c_T = a_T | b_T;
4017 but the above is more efficient. */
4019 static gimple *
4020 vect_recog_bool_pattern (vec_info *vinfo,
4021 stmt_vec_info stmt_vinfo, tree *type_out)
4023 gimple *last_stmt = stmt_vinfo->stmt;
4024 enum tree_code rhs_code;
4025 tree var, lhs, rhs, vectype;
4026 gimple *pattern_stmt;
4028 if (!is_gimple_assign (last_stmt))
4029 return NULL;
4031 var = gimple_assign_rhs1 (last_stmt);
4032 lhs = gimple_assign_lhs (last_stmt);
4033 rhs_code = gimple_assign_rhs_code (last_stmt);
4035 if (rhs_code == VIEW_CONVERT_EXPR)
4036 var = TREE_OPERAND (var, 0);
4038 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4039 return NULL;
4041 hash_set<gimple *> bool_stmts;
4043 if (CONVERT_EXPR_CODE_P (rhs_code)
4044 || rhs_code == VIEW_CONVERT_EXPR)
4046 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4047 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
4048 return NULL;
4049 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4050 if (vectype == NULL_TREE)
4051 return NULL;
4053 if (check_bool_pattern (var, vinfo, bool_stmts))
4055 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4056 TREE_TYPE (lhs), stmt_vinfo);
4057 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4058 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4059 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4060 else
4061 pattern_stmt
4062 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4064 else
4066 tree type = integer_type_for_mask (var, vinfo);
4067 tree cst0, cst1, tmp;
4069 if (!type)
4070 return NULL;
4072 /* We may directly use cond with narrowed type to avoid
4073 multiple cond exprs with following result packing and
4074 perform single cond with packed mask instead. In case
4075 of widening we better make cond first and then extract
4076 results. */
4077 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4078 type = TREE_TYPE (lhs);
4080 cst0 = build_int_cst (type, 0);
4081 cst1 = build_int_cst (type, 1);
4082 tmp = vect_recog_temp_ssa_var (type, NULL);
4083 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4085 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4087 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4088 append_pattern_def_seq (vinfo, stmt_vinfo,
4089 pattern_stmt, new_vectype);
4091 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4092 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4096 *type_out = vectype;
4097 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4099 return pattern_stmt;
4101 else if (rhs_code == COND_EXPR
4102 && TREE_CODE (var) == SSA_NAME)
4104 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4105 if (vectype == NULL_TREE)
4106 return NULL;
4108 /* Build a scalar type for the boolean result that when
4109 vectorized matches the vector type of the result in
4110 size and number of elements. */
4111 unsigned prec
4112 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4113 TYPE_VECTOR_SUBPARTS (vectype));
4115 tree type
4116 = build_nonstandard_integer_type (prec,
4117 TYPE_UNSIGNED (TREE_TYPE (var)));
4118 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4119 return NULL;
4121 if (!check_bool_pattern (var, vinfo, bool_stmts))
4122 return NULL;
4124 rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4126 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4127 pattern_stmt
4128 = gimple_build_assign (lhs, COND_EXPR,
4129 build2 (NE_EXPR, boolean_type_node,
4130 rhs, build_int_cst (type, 0)),
4131 gimple_assign_rhs2 (last_stmt),
4132 gimple_assign_rhs3 (last_stmt));
4133 *type_out = vectype;
4134 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4136 return pattern_stmt;
4138 else if (rhs_code == SSA_NAME
4139 && STMT_VINFO_DATA_REF (stmt_vinfo))
4141 stmt_vec_info pattern_stmt_info;
4142 tree nunits_vectype;
4143 if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4144 &nunits_vectype)
4145 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4146 return NULL;
4148 if (check_bool_pattern (var, vinfo, bool_stmts))
4149 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4150 TREE_TYPE (vectype), stmt_vinfo);
4151 else
4153 tree type = integer_type_for_mask (var, vinfo);
4154 tree cst0, cst1, new_vectype;
4156 if (!type)
4157 return NULL;
4159 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4160 type = TREE_TYPE (vectype);
4162 cst0 = build_int_cst (type, 0);
4163 cst1 = build_int_cst (type, 1);
4164 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4166 rhs = vect_recog_temp_ssa_var (type, NULL);
4167 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4168 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4171 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4172 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4174 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4175 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4176 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4177 rhs = rhs2;
4179 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4180 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4181 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4182 *type_out = vectype;
4183 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4185 return pattern_stmt;
4187 else
4188 return NULL;
4192 /* A helper for vect_recog_mask_conversion_pattern. Build
4193 conversion of MASK to a type suitable for masking VECTYPE.
4194 Built statement gets required vectype and is appended to
4195 a pattern sequence of STMT_VINFO.
4197 Return converted mask. */
4199 static tree
4200 build_mask_conversion (vec_info *vinfo,
4201 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4203 gimple *stmt;
4204 tree masktype, tmp;
4206 masktype = truth_type_for (vectype);
4207 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4208 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4209 append_pattern_def_seq (vinfo, stmt_vinfo,
4210 stmt, masktype, TREE_TYPE (vectype));
4212 return tmp;
4216 /* Function vect_recog_mask_conversion_pattern
4218 Try to find statements which require boolean type
4219 converison. Additional conversion statements are
4220 added to handle such cases. For example:
4222 bool m_1, m_2, m_3;
4223 int i_4, i_5;
4224 double d_6, d_7;
4225 char c_1, c_2, c_3;
4227 S1 m_1 = i_4 > i_5;
4228 S2 m_2 = d_6 < d_7;
4229 S3 m_3 = m_1 & m_2;
4230 S4 c_1 = m_3 ? c_2 : c_3;
4232 Will be transformed into:
4234 S1 m_1 = i_4 > i_5;
4235 S2 m_2 = d_6 < d_7;
4236 S3'' m_2' = (_Bool[bitsize=32])m_2
4237 S3' m_3' = m_1 & m_2';
4238 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4239 S4' c_1' = m_3'' ? c_2 : c_3; */
4241 static gimple *
4242 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4243 stmt_vec_info stmt_vinfo, tree *type_out)
4245 gimple *last_stmt = stmt_vinfo->stmt;
4246 enum tree_code rhs_code;
4247 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4248 tree vectype1, vectype2;
4249 stmt_vec_info pattern_stmt_info;
4250 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4251 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4253 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4254 if (is_gimple_call (last_stmt)
4255 && gimple_call_internal_p (last_stmt))
4257 gcall *pattern_stmt;
4259 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4260 int mask_argno = internal_fn_mask_index (ifn);
4261 if (mask_argno < 0)
4262 return NULL;
4264 bool store_p = internal_store_fn_p (ifn);
4265 if (store_p)
4267 int rhs_index = internal_fn_stored_value_index (ifn);
4268 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4269 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4271 else
4273 lhs = gimple_call_lhs (last_stmt);
4274 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4277 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4278 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4279 if (!mask_arg_type)
4280 return NULL;
4281 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4283 if (!vectype1 || !vectype2
4284 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4285 TYPE_VECTOR_SUBPARTS (vectype2)))
4286 return NULL;
4288 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4290 auto_vec<tree, 8> args;
4291 unsigned int nargs = gimple_call_num_args (last_stmt);
4292 args.safe_grow (nargs, true);
4293 for (unsigned int i = 0; i < nargs; ++i)
4294 args[i] = ((int) i == mask_argno
4295 ? tmp
4296 : gimple_call_arg (last_stmt, i));
4297 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4299 if (!store_p)
4301 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4302 gimple_call_set_lhs (pattern_stmt, lhs);
4304 gimple_call_set_nothrow (pattern_stmt, true);
4306 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4307 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4308 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4310 *type_out = vectype1;
4311 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4313 return pattern_stmt;
4316 if (!is_gimple_assign (last_stmt))
4317 return NULL;
4319 gimple *pattern_stmt;
4320 lhs = gimple_assign_lhs (last_stmt);
4321 rhs1 = gimple_assign_rhs1 (last_stmt);
4322 rhs_code = gimple_assign_rhs_code (last_stmt);
4324 /* Check for cond expression requiring mask conversion. */
4325 if (rhs_code == COND_EXPR)
4327 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4329 if (TREE_CODE (rhs1) == SSA_NAME)
4331 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4332 if (!rhs1_type)
4333 return NULL;
4335 else if (COMPARISON_CLASS_P (rhs1))
4337 /* Check whether we're comparing scalar booleans and (if so)
4338 whether a better mask type exists than the mask associated
4339 with boolean-sized elements. This avoids unnecessary packs
4340 and unpacks if the booleans are set from comparisons of
4341 wider types. E.g. in:
4343 int x1, x2, x3, x4, y1, y1;
4345 bool b1 = (x1 == x2);
4346 bool b2 = (x3 == x4);
4347 ... = b1 == b2 ? y1 : y2;
4349 it is better for b1 and b2 to use the mask type associated
4350 with int elements rather bool (byte) elements. */
4351 rhs1_op0 = TREE_OPERAND (rhs1, 0);
4352 rhs1_op1 = TREE_OPERAND (rhs1, 1);
4353 if (!rhs1_op0 || !rhs1_op1)
4354 return NULL;
4355 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4356 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4358 if (!rhs1_op0_type)
4359 rhs1_type = TREE_TYPE (rhs1_op0);
4360 else if (!rhs1_op1_type)
4361 rhs1_type = TREE_TYPE (rhs1_op1);
4362 else if (TYPE_PRECISION (rhs1_op0_type)
4363 != TYPE_PRECISION (rhs1_op1_type))
4365 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4366 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4367 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4368 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4369 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4371 if (abs (tmp0) > abs (tmp1))
4372 rhs1_type = rhs1_op1_type;
4373 else
4374 rhs1_type = rhs1_op0_type;
4376 else
4377 rhs1_type = build_nonstandard_integer_type
4378 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4380 else
4381 rhs1_type = rhs1_op0_type;
4383 else
4384 return NULL;
4386 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4388 if (!vectype1 || !vectype2)
4389 return NULL;
4391 /* Continue if a conversion is needed. Also continue if we have
4392 a comparison whose vector type would normally be different from
4393 VECTYPE2 when considered in isolation. In that case we'll
4394 replace the comparison with an SSA name (so that we can record
4395 its vector type) and behave as though the comparison was an SSA
4396 name from the outset. */
4397 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4398 TYPE_VECTOR_SUBPARTS (vectype2))
4399 && !rhs1_op0_type
4400 && !rhs1_op1_type)
4401 return NULL;
4403 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4404 in place, we can handle it in vectorizable_condition. This avoids
4405 unnecessary promotion stmts and increased vectorization factor. */
4406 if (COMPARISON_CLASS_P (rhs1)
4407 && INTEGRAL_TYPE_P (rhs1_type)
4408 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4409 TYPE_VECTOR_SUBPARTS (vectype2)))
4411 enum vect_def_type dt;
4412 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4413 && dt == vect_external_def
4414 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4415 && (dt == vect_external_def
4416 || dt == vect_constant_def))
4418 tree wide_scalar_type = build_nonstandard_integer_type
4419 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4420 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4421 wide_scalar_type);
4422 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4423 return NULL;
4427 /* If rhs1 is a comparison we need to move it into a
4428 separate statement. */
4429 if (TREE_CODE (rhs1) != SSA_NAME)
4431 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4432 if (rhs1_op0_type
4433 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4434 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4435 vectype2, stmt_vinfo);
4436 if (rhs1_op1_type
4437 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4438 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4439 vectype2, stmt_vinfo);
4440 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4441 rhs1_op0, rhs1_op1);
4442 rhs1 = tmp;
4443 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4444 rhs1_type);
4447 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4448 TYPE_VECTOR_SUBPARTS (vectype2)))
4449 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4450 else
4451 tmp = rhs1;
4453 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4454 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4455 gimple_assign_rhs2 (last_stmt),
4456 gimple_assign_rhs3 (last_stmt));
4458 *type_out = vectype1;
4459 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4461 return pattern_stmt;
4464 /* Now check for binary boolean operations requiring conversion for
4465 one of operands. */
4466 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4467 return NULL;
4469 if (rhs_code != BIT_IOR_EXPR
4470 && rhs_code != BIT_XOR_EXPR
4471 && rhs_code != BIT_AND_EXPR
4472 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4473 return NULL;
4475 rhs2 = gimple_assign_rhs2 (last_stmt);
4477 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4478 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4480 if (!rhs1_type || !rhs2_type
4481 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4482 return NULL;
4484 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4486 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4487 if (!vectype1)
4488 return NULL;
4489 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4491 else
4493 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4494 if (!vectype1)
4495 return NULL;
4496 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4499 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4500 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4502 *type_out = vectype1;
4503 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4505 return pattern_stmt;
4508 /* STMT_INFO is a load or store. If the load or store is conditional, return
4509 the boolean condition under which it occurs, otherwise return null. */
4511 static tree
4512 vect_get_load_store_mask (stmt_vec_info stmt_info)
4514 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4516 gcc_assert (gimple_assign_single_p (def_assign));
4517 return NULL_TREE;
4520 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4522 internal_fn ifn = gimple_call_internal_fn (def_call);
4523 int mask_index = internal_fn_mask_index (ifn);
4524 return gimple_call_arg (def_call, mask_index);
4527 gcc_unreachable ();
4530 /* Return MASK if MASK is suitable for masking an operation on vectors
4531 of type VECTYPE, otherwise convert it into such a form and return
4532 the result. Associate any conversion statements with STMT_INFO's
4533 pattern. */
4535 static tree
4536 vect_convert_mask_for_vectype (tree mask, tree vectype,
4537 stmt_vec_info stmt_info, vec_info *vinfo)
4539 tree mask_type = integer_type_for_mask (mask, vinfo);
4540 if (mask_type)
4542 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4543 if (mask_vectype
4544 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4545 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4546 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4548 return mask;
4551 /* Return the equivalent of:
4553 fold_convert (TYPE, VALUE)
4555 with the expectation that the operation will be vectorized.
4556 If new statements are needed, add them as pattern statements
4557 to STMT_INFO. */
4559 static tree
4560 vect_add_conversion_to_pattern (vec_info *vinfo,
4561 tree type, tree value, stmt_vec_info stmt_info)
4563 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4564 return value;
4566 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4567 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4568 append_pattern_def_seq (vinfo, stmt_info, conversion,
4569 get_vectype_for_scalar_type (vinfo, type));
4570 return new_value;
4573 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4574 internal function. Return the final statement on success and set
4575 *TYPE_OUT to the vector type being loaded or stored.
4577 This function only handles gathers and scatters that were recognized
4578 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4580 static gimple *
4581 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4582 stmt_vec_info stmt_info, tree *type_out)
4584 /* Currently we only support this for loop vectorization. */
4585 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4586 if (!loop_vinfo)
4587 return NULL;
4589 /* Make sure that we're looking at a gather load or scatter store. */
4590 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4591 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4592 return NULL;
4594 /* Get the boolean that controls whether the load or store happens.
4595 This is null if the operation is unconditional. */
4596 tree mask = vect_get_load_store_mask (stmt_info);
4598 /* Make sure that the target supports an appropriate internal
4599 function for the gather/scatter operation. */
4600 gather_scatter_info gs_info;
4601 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4602 || gs_info.decl)
4603 return NULL;
4605 /* Convert the mask to the right form. */
4606 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4607 gs_info.element_type);
4608 if (mask)
4609 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4610 loop_vinfo);
4612 /* Get the invariant base and non-invariant offset, converting the
4613 latter to the same width as the vector elements. */
4614 tree base = gs_info.base;
4615 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4616 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4617 gs_info.offset, stmt_info);
4619 /* Build the new pattern statement. */
4620 tree scale = size_int (gs_info.scale);
4621 gcall *pattern_stmt;
4622 if (DR_IS_READ (dr))
4624 tree zero = build_zero_cst (gs_info.element_type);
4625 if (mask != NULL)
4626 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4627 offset, scale, zero, mask);
4628 else
4629 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4630 offset, scale, zero);
4631 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4632 gimple_call_set_lhs (pattern_stmt, load_lhs);
4634 else
4636 tree rhs = vect_get_store_rhs (stmt_info);
4637 if (mask != NULL)
4638 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4639 base, offset, scale, rhs,
4640 mask);
4641 else
4642 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4643 base, offset, scale, rhs);
4645 gimple_call_set_nothrow (pattern_stmt, true);
4647 /* Copy across relevant vectorization info and associate DR with the
4648 new pattern statement instead of the original statement. */
4649 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4650 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4652 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4653 *type_out = vectype;
4654 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4656 return pattern_stmt;
4659 /* Return true if TYPE is a non-boolean integer type. These are the types
4660 that we want to consider for narrowing. */
4662 static bool
4663 vect_narrowable_type_p (tree type)
4665 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4668 /* Return true if the operation given by CODE can be truncated to N bits
4669 when only N bits of the output are needed. This is only true if bit N+1
4670 of the inputs has no effect on the low N bits of the result. */
4672 static bool
4673 vect_truncatable_operation_p (tree_code code)
4675 switch (code)
4677 case PLUS_EXPR:
4678 case MINUS_EXPR:
4679 case MULT_EXPR:
4680 case BIT_AND_EXPR:
4681 case BIT_IOR_EXPR:
4682 case BIT_XOR_EXPR:
4683 case COND_EXPR:
4684 return true;
4686 default:
4687 return false;
4691 /* Record that STMT_INFO could be changed from operating on TYPE to
4692 operating on a type with the precision and sign given by PRECISION
4693 and SIGN respectively. PRECISION is an arbitrary bit precision;
4694 it might not be a whole number of bytes. */
4696 static void
4697 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4698 unsigned int precision, signop sign)
4700 /* Round the precision up to a whole number of bytes. */
4701 precision = vect_element_precision (precision);
4702 if (precision < TYPE_PRECISION (type)
4703 && (!stmt_info->operation_precision
4704 || stmt_info->operation_precision > precision))
4706 stmt_info->operation_precision = precision;
4707 stmt_info->operation_sign = sign;
4711 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4712 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4713 is an arbitrary bit precision; it might not be a whole number of bytes. */
4715 static void
4716 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4717 unsigned int min_input_precision)
4719 /* This operation in isolation only requires the inputs to have
4720 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4721 that MIN_INPUT_PRECISION is a natural precision for the chain
4722 as a whole. E.g. consider something like:
4724 unsigned short *x, *y;
4725 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4727 The right shift can be done on unsigned chars, and only requires the
4728 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4729 approach would mean turning a natural chain of single-vector unsigned
4730 short operations into one that truncates "*x" and then extends
4731 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4732 operation and one vector for each unsigned char operation.
4733 This would be a significant pessimization.
4735 Instead only propagate the maximum of this precision and the precision
4736 required by the users of the result. This means that we don't pessimize
4737 the case above but continue to optimize things like:
4739 unsigned char *y;
4740 unsigned short *x;
4741 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4743 Here we would truncate two vectors of *x to a single vector of
4744 unsigned chars and use single-vector unsigned char operations for
4745 everything else, rather than doing two unsigned short copies of
4746 "(*x & 0xf0) >> 4" and then truncating the result. */
4747 min_input_precision = MAX (min_input_precision,
4748 stmt_info->min_output_precision);
4750 if (min_input_precision < TYPE_PRECISION (type)
4751 && (!stmt_info->min_input_precision
4752 || stmt_info->min_input_precision > min_input_precision))
4753 stmt_info->min_input_precision = min_input_precision;
4756 /* Subroutine of vect_determine_min_output_precision. Return true if
4757 we can calculate a reduced number of output bits for STMT_INFO,
4758 whose result is LHS. */
4760 static bool
4761 vect_determine_min_output_precision_1 (vec_info *vinfo,
4762 stmt_vec_info stmt_info, tree lhs)
4764 /* Take the maximum precision required by users of the result. */
4765 unsigned int precision = 0;
4766 imm_use_iterator iter;
4767 use_operand_p use;
4768 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4770 gimple *use_stmt = USE_STMT (use);
4771 if (is_gimple_debug (use_stmt))
4772 continue;
4773 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4774 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4775 return false;
4776 /* The input precision recorded for COND_EXPRs applies only to the
4777 "then" and "else" values. */
4778 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4779 if (assign
4780 && gimple_assign_rhs_code (assign) == COND_EXPR
4781 && use->use != gimple_assign_rhs2_ptr (assign)
4782 && use->use != gimple_assign_rhs3_ptr (assign))
4783 return false;
4784 precision = MAX (precision, use_stmt_info->min_input_precision);
4787 if (dump_enabled_p ())
4788 dump_printf_loc (MSG_NOTE, vect_location,
4789 "only the low %d bits of %T are significant\n",
4790 precision, lhs);
4791 stmt_info->min_output_precision = precision;
4792 return true;
4795 /* Calculate min_output_precision for STMT_INFO. */
4797 static void
4798 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
4800 /* We're only interested in statements with a narrowable result. */
4801 tree lhs = gimple_get_lhs (stmt_info->stmt);
4802 if (!lhs
4803 || TREE_CODE (lhs) != SSA_NAME
4804 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4805 return;
4807 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
4808 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4811 /* Use range information to decide whether STMT (described by STMT_INFO)
4812 could be done in a narrower type. This is effectively a forward
4813 propagation, since it uses context-independent information that applies
4814 to all users of an SSA name. */
4816 static void
4817 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4819 tree lhs = gimple_assign_lhs (stmt);
4820 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4821 return;
4823 tree type = TREE_TYPE (lhs);
4824 if (!vect_narrowable_type_p (type))
4825 return;
4827 /* First see whether we have any useful range information for the result. */
4828 unsigned int precision = TYPE_PRECISION (type);
4829 signop sign = TYPE_SIGN (type);
4830 wide_int min_value, max_value;
4831 if (!vect_get_range_info (lhs, &min_value, &max_value))
4832 return;
4834 tree_code code = gimple_assign_rhs_code (stmt);
4835 unsigned int nops = gimple_num_ops (stmt);
4837 if (!vect_truncatable_operation_p (code))
4838 /* Check that all relevant input operands are compatible, and update
4839 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4840 for (unsigned int i = 1; i < nops; ++i)
4842 tree op = gimple_op (stmt, i);
4843 if (TREE_CODE (op) == INTEGER_CST)
4845 /* Don't require the integer to have RHS_TYPE (which it might
4846 not for things like shift amounts, etc.), but do require it
4847 to fit the type. */
4848 if (!int_fits_type_p (op, type))
4849 return;
4851 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4852 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4854 else if (TREE_CODE (op) == SSA_NAME)
4856 /* Ignore codes that don't take uniform arguments. */
4857 if (!types_compatible_p (TREE_TYPE (op), type))
4858 return;
4860 wide_int op_min_value, op_max_value;
4861 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4862 return;
4864 min_value = wi::min (min_value, op_min_value, sign);
4865 max_value = wi::max (max_value, op_max_value, sign);
4867 else
4868 return;
4871 /* Try to switch signed types for unsigned types if we can.
4872 This is better for two reasons. First, unsigned ops tend
4873 to be cheaper than signed ops. Second, it means that we can
4874 handle things like:
4876 signed char c;
4877 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4881 signed char c;
4882 unsigned short res_1 = (unsigned short) c & 0xff00;
4883 int res = (int) res_1;
4885 where the intermediate result res_1 has unsigned rather than
4886 signed type. */
4887 if (sign == SIGNED && !wi::neg_p (min_value))
4888 sign = UNSIGNED;
4890 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4891 unsigned int precision1 = wi::min_precision (min_value, sign);
4892 unsigned int precision2 = wi::min_precision (max_value, sign);
4893 unsigned int value_precision = MAX (precision1, precision2);
4894 if (value_precision >= precision)
4895 return;
4897 if (dump_enabled_p ())
4898 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4899 " without loss of precision: %G",
4900 sign == SIGNED ? "signed" : "unsigned",
4901 value_precision, stmt);
4903 vect_set_operation_type (stmt_info, type, value_precision, sign);
4904 vect_set_min_input_precision (stmt_info, type, value_precision);
4907 /* Use information about the users of STMT's result to decide whether
4908 STMT (described by STMT_INFO) could be done in a narrower type.
4909 This is effectively a backward propagation. */
4911 static void
4912 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4914 tree_code code = gimple_assign_rhs_code (stmt);
4915 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4916 tree type = TREE_TYPE (gimple_op (stmt, opno));
4917 if (!vect_narrowable_type_p (type))
4918 return;
4920 unsigned int precision = TYPE_PRECISION (type);
4921 unsigned int operation_precision, min_input_precision;
4922 switch (code)
4924 CASE_CONVERT:
4925 /* Only the bits that contribute to the output matter. Don't change
4926 the precision of the operation itself. */
4927 operation_precision = precision;
4928 min_input_precision = stmt_info->min_output_precision;
4929 break;
4931 case LSHIFT_EXPR:
4932 case RSHIFT_EXPR:
4934 tree shift = gimple_assign_rhs2 (stmt);
4935 if (TREE_CODE (shift) != INTEGER_CST
4936 || !wi::ltu_p (wi::to_widest (shift), precision))
4937 return;
4938 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4939 if (code == LSHIFT_EXPR)
4941 /* We need CONST_SHIFT fewer bits of the input. */
4942 operation_precision = stmt_info->min_output_precision;
4943 min_input_precision = (MAX (operation_precision, const_shift)
4944 - const_shift);
4946 else
4948 /* We need CONST_SHIFT extra bits to do the operation. */
4949 operation_precision = (stmt_info->min_output_precision
4950 + const_shift);
4951 min_input_precision = operation_precision;
4953 break;
4956 default:
4957 if (vect_truncatable_operation_p (code))
4959 /* Input bit N has no effect on output bits N-1 and lower. */
4960 operation_precision = stmt_info->min_output_precision;
4961 min_input_precision = operation_precision;
4962 break;
4964 return;
4967 if (operation_precision < precision)
4969 if (dump_enabled_p ())
4970 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4971 " without affecting users: %G",
4972 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4973 operation_precision, stmt);
4974 vect_set_operation_type (stmt_info, type, operation_precision,
4975 TYPE_SIGN (type));
4977 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4980 /* Return true if the statement described by STMT_INFO sets a boolean
4981 SSA_NAME and if we know how to vectorize this kind of statement using
4982 vector mask types. */
4984 static bool
4985 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
4987 tree lhs = gimple_get_lhs (stmt_info->stmt);
4988 if (!lhs
4989 || TREE_CODE (lhs) != SSA_NAME
4990 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4991 return false;
4993 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
4995 tree_code rhs_code = gimple_assign_rhs_code (assign);
4996 switch (rhs_code)
4998 CASE_CONVERT:
4999 case SSA_NAME:
5000 case BIT_NOT_EXPR:
5001 case BIT_IOR_EXPR:
5002 case BIT_XOR_EXPR:
5003 case BIT_AND_EXPR:
5004 return true;
5006 default:
5007 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5010 return false;
5013 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5014 a vector mask type instead of a normal vector type. Record the
5015 result in STMT_INFO->mask_precision. */
5017 static void
5018 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5020 if (!possible_vector_mask_operation_p (stmt_info)
5021 || stmt_info->mask_precision)
5022 return;
5024 auto_vec<stmt_vec_info, 32> worklist;
5025 worklist.quick_push (stmt_info);
5026 while (!worklist.is_empty ())
5028 stmt_info = worklist.last ();
5029 unsigned int orig_length = worklist.length ();
5031 /* If at least one boolean input uses a vector mask type,
5032 pick the mask type with the narrowest elements.
5034 ??? This is the traditional behavior. It should always produce
5035 the smallest number of operations, but isn't necessarily the
5036 optimal choice. For example, if we have:
5038 a = b & c
5040 where:
5042 - the user of a wants it to have a mask type for 16-bit elements (M16)
5043 - b also uses M16
5044 - c uses a mask type for 8-bit elements (M8)
5046 then picking M8 gives:
5048 - 1 M16->M8 pack for b
5049 - 1 M8 AND for a
5050 - 2 M8->M16 unpacks for the user of a
5052 whereas picking M16 would have given:
5054 - 2 M8->M16 unpacks for c
5055 - 2 M16 ANDs for a
5057 The number of operations are equal, but M16 would have given
5058 a shorter dependency chain and allowed more ILP. */
5059 unsigned int precision = ~0U;
5060 gassign *assign = as_a <gassign *> (stmt_info->stmt);
5061 unsigned int nops = gimple_num_ops (assign);
5062 for (unsigned int i = 1; i < nops; ++i)
5064 tree rhs = gimple_op (assign, i);
5065 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5066 continue;
5068 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5069 if (!def_stmt_info)
5070 /* Don't let external or constant operands influence the choice.
5071 We can convert them to whichever vector type we pick. */
5072 continue;
5074 if (def_stmt_info->mask_precision)
5076 if (precision > def_stmt_info->mask_precision)
5077 precision = def_stmt_info->mask_precision;
5079 else if (possible_vector_mask_operation_p (def_stmt_info))
5080 worklist.safe_push (def_stmt_info);
5083 /* Defer the choice if we need to visit operands first. */
5084 if (orig_length != worklist.length ())
5085 continue;
5087 /* If the statement compares two values that shouldn't use vector masks,
5088 try comparing the values as normal scalars instead. */
5089 tree_code rhs_code = gimple_assign_rhs_code (assign);
5090 if (precision == ~0U
5091 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5093 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5094 scalar_mode mode;
5095 tree vectype, mask_type;
5096 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5097 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5098 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5099 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5100 precision = GET_MODE_BITSIZE (mode);
5103 if (dump_enabled_p ())
5105 if (precision == ~0U)
5106 dump_printf_loc (MSG_NOTE, vect_location,
5107 "using normal nonmask vectors for %G",
5108 stmt_info->stmt);
5109 else
5110 dump_printf_loc (MSG_NOTE, vect_location,
5111 "using boolean precision %d for %G",
5112 precision, stmt_info->stmt);
5115 stmt_info->mask_precision = precision;
5116 worklist.pop ();
5120 /* Handle vect_determine_precisions for STMT_INFO, given that we
5121 have already done so for the users of its result. */
5123 void
5124 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5126 vect_determine_min_output_precision (vinfo, stmt_info);
5127 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5129 vect_determine_precisions_from_range (stmt_info, stmt);
5130 vect_determine_precisions_from_users (stmt_info, stmt);
5132 vect_determine_mask_precision (vinfo, stmt_info);
5135 /* Walk backwards through the vectorizable region to determine the
5136 values of these fields:
5138 - min_output_precision
5139 - min_input_precision
5140 - operation_precision
5141 - operation_sign. */
5143 void
5144 vect_determine_precisions (vec_info *vinfo)
5146 DUMP_VECT_SCOPE ("vect_determine_precisions");
5148 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5150 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5151 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5152 unsigned int nbbs = loop->num_nodes;
5154 for (unsigned int i = 0; i < nbbs; i++)
5156 basic_block bb = bbs[nbbs - i - 1];
5157 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5158 !gsi_end_p (si); gsi_prev (&si))
5159 if (!is_gimple_debug (gsi_stmt (si)))
5160 vect_determine_stmt_precisions
5161 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5164 else
5166 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5167 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5168 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5169 !gsi_end_p (gsi); gsi_prev (&gsi))
5171 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5172 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5173 vect_determine_stmt_precisions (vinfo, stmt_info);
5178 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5180 struct vect_recog_func
5182 vect_recog_func_ptr fn;
5183 const char *name;
5186 /* Note that ordering matters - the first pattern matching on a stmt is
5187 taken which means usually the more complex one needs to preceed the
5188 less comples onex (widen_sum only after dot_prod or sad for example). */
5189 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5190 { vect_recog_over_widening_pattern, "over_widening" },
5191 /* Must come after over_widening, which narrows the shift as much as
5192 possible beforehand. */
5193 { vect_recog_average_pattern, "average" },
5194 { vect_recog_mulhs_pattern, "mult_high" },
5195 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5196 { vect_recog_widen_mult_pattern, "widen_mult" },
5197 { vect_recog_dot_prod_pattern, "dot_prod" },
5198 { vect_recog_sad_pattern, "sad" },
5199 { vect_recog_widen_sum_pattern, "widen_sum" },
5200 { vect_recog_pow_pattern, "pow" },
5201 { vect_recog_widen_shift_pattern, "widen_shift" },
5202 { vect_recog_rotate_pattern, "rotate" },
5203 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5204 { vect_recog_divmod_pattern, "divmod" },
5205 { vect_recog_mult_pattern, "mult" },
5206 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5207 { vect_recog_bool_pattern, "bool" },
5208 /* This must come before mask conversion, and includes the parts
5209 of mask conversion that are needed for gather and scatter
5210 internal functions. */
5211 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5212 { vect_recog_mask_conversion_pattern, "mask_conversion" }
5215 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5217 /* Mark statements that are involved in a pattern. */
5219 static inline void
5220 vect_mark_pattern_stmts (vec_info *vinfo,
5221 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5222 tree pattern_vectype)
5224 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5225 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5227 gimple *orig_pattern_stmt = NULL;
5228 if (is_pattern_stmt_p (orig_stmt_info))
5230 /* We're replacing a statement in an existing pattern definition
5231 sequence. */
5232 orig_pattern_stmt = orig_stmt_info->stmt;
5233 if (dump_enabled_p ())
5234 dump_printf_loc (MSG_NOTE, vect_location,
5235 "replacing earlier pattern %G", orig_pattern_stmt);
5237 /* To keep the book-keeping simple, just swap the lhs of the
5238 old and new statements, so that the old one has a valid but
5239 unused lhs. */
5240 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5241 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5242 gimple_set_lhs (pattern_stmt, old_lhs);
5244 if (dump_enabled_p ())
5245 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5247 /* Switch to the statement that ORIG replaces. */
5248 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5250 /* We shouldn't be replacing the main pattern statement. */
5251 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5252 != orig_pattern_stmt);
5255 if (def_seq)
5256 for (gimple_stmt_iterator si = gsi_start (def_seq);
5257 !gsi_end_p (si); gsi_next (&si))
5259 if (dump_enabled_p ())
5260 dump_printf_loc (MSG_NOTE, vect_location,
5261 "extra pattern stmt: %G", gsi_stmt (si));
5262 stmt_vec_info pattern_stmt_info
5263 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5264 orig_stmt_info, pattern_vectype);
5265 /* Stmts in the def sequence are not vectorizable cycle or
5266 induction defs, instead they should all be vect_internal_def
5267 feeding the main pattern stmt which retains this def type. */
5268 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5271 if (orig_pattern_stmt)
5273 vect_init_pattern_stmt (vinfo, pattern_stmt,
5274 orig_stmt_info, pattern_vectype);
5276 /* Insert all the new pattern statements before the original one. */
5277 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5278 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5279 orig_def_seq);
5280 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5281 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5283 /* Remove the pattern statement that this new pattern replaces. */
5284 gsi_remove (&gsi, false);
5286 else
5287 vect_set_pattern_stmt (vinfo,
5288 pattern_stmt, orig_stmt_info, pattern_vectype);
5290 /* Transfer reduction path info to the pattern. */
5291 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5293 tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5294 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5295 /* Search the pattern def sequence and the main pattern stmt. Note
5296 we may have inserted all into a containing pattern def sequence
5297 so the following is a bit awkward. */
5298 gimple_stmt_iterator si;
5299 gimple *s;
5300 if (def_seq)
5302 si = gsi_start (def_seq);
5303 s = gsi_stmt (si);
5304 gsi_next (&si);
5306 else
5308 si = gsi_none ();
5309 s = pattern_stmt;
5313 bool found = false;
5314 for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5315 if (gimple_op (s, i) == lookfor)
5317 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5318 lookfor = gimple_get_lhs (s);
5319 found = true;
5320 break;
5322 if (s == pattern_stmt)
5324 if (!found && dump_enabled_p ())
5325 dump_printf_loc (MSG_NOTE, vect_location,
5326 "failed to update reduction index.\n");
5327 break;
5329 if (gsi_end_p (si))
5330 s = pattern_stmt;
5331 else
5333 s = gsi_stmt (si);
5334 if (s == pattern_stmt)
5335 /* Found the end inside a bigger pattern def seq. */
5336 si = gsi_none ();
5337 else
5338 gsi_next (&si);
5340 } while (1);
5344 /* Function vect_pattern_recog_1
5346 Input:
5347 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5348 computation pattern.
5349 STMT_INFO: A stmt from which the pattern search should start.
5351 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5352 a sequence of statements that has the same functionality and can be
5353 used to replace STMT_INFO. It returns the last statement in the sequence
5354 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5355 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5356 statement, having first checked that the target supports the new operation
5357 in that type.
5359 This function also does some bookkeeping, as explained in the documentation
5360 for vect_recog_pattern. */
5362 static void
5363 vect_pattern_recog_1 (vec_info *vinfo,
5364 vect_recog_func *recog_func, stmt_vec_info stmt_info)
5366 gimple *pattern_stmt;
5367 loop_vec_info loop_vinfo;
5368 tree pattern_vectype;
5370 /* If this statement has already been replaced with pattern statements,
5371 leave the original statement alone, since the first match wins.
5372 Instead try to match against the definition statements that feed
5373 the main pattern statement. */
5374 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5376 gimple_stmt_iterator gsi;
5377 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5378 !gsi_end_p (gsi); gsi_next (&gsi))
5379 vect_pattern_recog_1 (vinfo, recog_func,
5380 vinfo->lookup_stmt (gsi_stmt (gsi)));
5381 return;
5384 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5385 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5386 if (!pattern_stmt)
5388 /* Clear any half-formed pattern definition sequence. */
5389 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5390 return;
5393 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5394 gcc_assert (pattern_vectype);
5396 /* Found a vectorizable pattern. */
5397 if (dump_enabled_p ())
5398 dump_printf_loc (MSG_NOTE, vect_location,
5399 "%s pattern recognized: %G",
5400 recog_func->name, pattern_stmt);
5402 /* Mark the stmts that are involved in the pattern. */
5403 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5405 /* Patterns cannot be vectorized using SLP, because they change the order of
5406 computation. */
5407 if (loop_vinfo)
5409 unsigned ix, ix2;
5410 stmt_vec_info *elem_ptr;
5411 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5412 elem_ptr, *elem_ptr == stmt_info);
5417 /* Function vect_pattern_recog
5419 Input:
5420 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5421 computation idioms.
5423 Output - for each computation idiom that is detected we create a new stmt
5424 that provides the same functionality and that can be vectorized. We
5425 also record some information in the struct_stmt_info of the relevant
5426 stmts, as explained below:
5428 At the entry to this function we have the following stmts, with the
5429 following initial value in the STMT_VINFO fields:
5431 stmt in_pattern_p related_stmt vec_stmt
5432 S1: a_i = .... - - -
5433 S2: a_2 = ..use(a_i).. - - -
5434 S3: a_1 = ..use(a_2).. - - -
5435 S4: a_0 = ..use(a_1).. - - -
5436 S5: ... = ..use(a_0).. - - -
5438 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5439 represented by a single stmt. We then:
5440 - create a new stmt S6 equivalent to the pattern (the stmt is not
5441 inserted into the code)
5442 - fill in the STMT_VINFO fields as follows:
5444 in_pattern_p related_stmt vec_stmt
5445 S1: a_i = .... - - -
5446 S2: a_2 = ..use(a_i).. - - -
5447 S3: a_1 = ..use(a_2).. - - -
5448 S4: a_0 = ..use(a_1).. true S6 -
5449 '---> S6: a_new = .... - S4 -
5450 S5: ... = ..use(a_0).. - - -
5452 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5453 to each other through the RELATED_STMT field).
5455 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5456 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5457 remain irrelevant unless used by stmts other than S4.
5459 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5460 (because they are marked as irrelevant). It will vectorize S6, and record
5461 a pointer to the new vector stmt VS6 from S6 (as usual).
5462 S4 will be skipped, and S5 will be vectorized as usual:
5464 in_pattern_p related_stmt vec_stmt
5465 S1: a_i = .... - - -
5466 S2: a_2 = ..use(a_i).. - - -
5467 S3: a_1 = ..use(a_2).. - - -
5468 > VS6: va_new = .... - - -
5469 S4: a_0 = ..use(a_1).. true S6 VS6
5470 '---> S6: a_new = .... - S4 VS6
5471 > VS5: ... = ..vuse(va_new).. - - -
5472 S5: ... = ..use(a_0).. - - -
5474 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5475 elsewhere), and we'll end up with:
5477 VS6: va_new = ....
5478 VS5: ... = ..vuse(va_new)..
5480 In case of more than one pattern statements, e.g., widen-mult with
5481 intermediate type:
5483 S1 a_t = ;
5484 S2 a_T = (TYPE) a_t;
5485 '--> S3: a_it = (interm_type) a_t;
5486 S4 prod_T = a_T * CONST;
5487 '--> S5: prod_T' = a_it w* CONST;
5489 there may be other users of a_T outside the pattern. In that case S2 will
5490 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5491 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5492 be recorded in S3. */
5494 void
5495 vect_pattern_recog (vec_info *vinfo)
5497 class loop *loop;
5498 basic_block *bbs;
5499 unsigned int nbbs;
5500 gimple_stmt_iterator si;
5501 unsigned int i, j;
5503 vect_determine_precisions (vinfo);
5505 DUMP_VECT_SCOPE ("vect_pattern_recog");
5507 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5509 loop = LOOP_VINFO_LOOP (loop_vinfo);
5510 bbs = LOOP_VINFO_BBS (loop_vinfo);
5511 nbbs = loop->num_nodes;
5513 /* Scan through the loop stmts, applying the pattern recognition
5514 functions starting at each stmt visited: */
5515 for (i = 0; i < nbbs; i++)
5517 basic_block bb = bbs[i];
5518 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5520 if (is_gimple_debug (gsi_stmt (si)))
5521 continue;
5522 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5523 /* Scan over all generic vect_recog_xxx_pattern functions. */
5524 for (j = 0; j < NUM_PATTERNS; j++)
5525 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5526 stmt_info);
5530 else
5532 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5533 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5534 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5535 !gsi_end_p (gsi); gsi_next (&gsi))
5537 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5538 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5539 continue;
5541 /* Scan over all generic vect_recog_xxx_pattern functions. */
5542 for (j = 0; j < NUM_PATTERNS; j++)
5543 vect_pattern_recog_1 (vinfo,
5544 &vect_vect_recog_func_ptrs[j], stmt_info);
5548 /* After this no more add_stmt calls are allowed. */
5549 vinfo->stmt_vec_info_ro = true;