c++: 'this' adjustment for devirtualized call
[official-gcc.git] / gcc / tree-vect-patterns.c
blob177d44ebb5e209372f1b52f40c725bbaf00a12fd
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 #include "gimple-range.h"
53 /* Return true if we have a useful VR_RANGE range for VAR, storing it
54 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
56 static bool
57 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
59 value_range vr;
60 get_range_query (cfun)->range_of_expr (vr, var);
61 if (vr.undefined_p ())
62 vr.set_varying (TREE_TYPE (var));
63 *min_value = wi::to_wide (vr.min ());
64 *max_value = wi::to_wide (vr.max ());
65 value_range_kind vr_type = vr.kind ();
66 wide_int nonzero = get_nonzero_bits (var);
67 signop sgn = TYPE_SIGN (TREE_TYPE (var));
68 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
69 nonzero, sgn) == VR_RANGE)
71 if (dump_enabled_p ())
73 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
74 dump_printf (MSG_NOTE, " has range [");
75 dump_hex (MSG_NOTE, *min_value);
76 dump_printf (MSG_NOTE, ", ");
77 dump_hex (MSG_NOTE, *max_value);
78 dump_printf (MSG_NOTE, "]\n");
80 return true;
82 else
84 if (dump_enabled_p ())
86 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
87 dump_printf (MSG_NOTE, " has no range info\n");
89 return false;
93 /* Report that we've found an instance of pattern PATTERN in
94 statement STMT. */
96 static void
97 vect_pattern_detected (const char *name, gimple *stmt)
99 if (dump_enabled_p ())
100 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
103 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
104 return the pattern statement's stmt_vec_info. Set its vector type to
105 VECTYPE if it doesn't have one already. */
107 static stmt_vec_info
108 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
109 stmt_vec_info orig_stmt_info, tree vectype)
111 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
112 if (pattern_stmt_info == NULL)
113 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
114 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
116 pattern_stmt_info->pattern_stmt_p = true;
117 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
118 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
119 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
120 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
122 gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
123 == vect_use_mask_type_p (orig_stmt_info));
124 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
125 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
127 return pattern_stmt_info;
130 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
131 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
132 have one already. */
134 static void
135 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
136 stmt_vec_info orig_stmt_info, tree vectype)
138 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
139 STMT_VINFO_RELATED_STMT (orig_stmt_info)
140 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
143 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
144 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
145 be different from the vector type of the final pattern statement.
146 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
147 from which it was derived. */
149 static inline void
150 append_pattern_def_seq (vec_info *vinfo,
151 stmt_vec_info stmt_info, gimple *new_stmt,
152 tree vectype = NULL_TREE,
153 tree scalar_type_for_mask = NULL_TREE)
155 gcc_assert (!scalar_type_for_mask
156 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
157 if (vectype)
159 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
160 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
161 if (scalar_type_for_mask)
162 new_stmt_info->mask_precision
163 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
165 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
166 new_stmt);
169 /* The caller wants to perform new operations on vect_external variable
170 VAR, so that the result of the operations would also be vect_external.
171 Return the edge on which the operations can be performed, if one exists.
172 Return null if the operations should instead be treated as part of
173 the pattern that needs them. */
175 static edge
176 vect_get_external_def_edge (vec_info *vinfo, tree var)
178 edge e = NULL;
179 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
181 e = loop_preheader_edge (loop_vinfo->loop);
182 if (!SSA_NAME_IS_DEFAULT_DEF (var))
184 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
185 if (bb == NULL
186 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
187 e = NULL;
190 return e;
193 /* Return true if the target supports a vector version of CODE,
194 where CODE is known to map to a direct optab. ITYPE specifies
195 the type of (some of) the scalar inputs and OTYPE specifies the
196 type of the scalar result.
198 If CODE allows the inputs and outputs to have different type
199 (such as for WIDEN_SUM_EXPR), it is the input mode rather
200 than the output mode that determines the appropriate target pattern.
201 Operand 0 of the target pattern then specifies the mode that the output
202 must have.
204 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
205 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
206 is nonnull. */
208 static bool
209 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
210 tree itype, tree *vecotype_out,
211 tree *vecitype_out = NULL)
213 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
214 if (!vecitype)
215 return false;
217 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
218 if (!vecotype)
219 return false;
221 optab optab = optab_for_tree_code (code, vecitype, optab_default);
222 if (!optab)
223 return false;
225 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
226 if (icode == CODE_FOR_nothing
227 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
228 return false;
230 *vecotype_out = vecotype;
231 if (vecitype_out)
232 *vecitype_out = vecitype;
233 return true;
236 /* Round bit precision PRECISION up to a full element. */
238 static unsigned int
239 vect_element_precision (unsigned int precision)
241 precision = 1 << ceil_log2 (precision);
242 return MAX (precision, BITS_PER_UNIT);
245 /* If OP is defined by a statement that's being considered for vectorization,
246 return information about that statement, otherwise return NULL. */
248 static stmt_vec_info
249 vect_get_internal_def (vec_info *vinfo, tree op)
251 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
252 if (def_stmt_info
253 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
254 return def_stmt_info;
255 return NULL;
258 /* Check whether NAME, an ssa-name used in STMT_VINFO,
259 is a result of a type promotion, such that:
260 DEF_STMT: NAME = NOP (name0)
261 If CHECK_SIGN is TRUE, check that either both types are signed or both are
262 unsigned. */
264 static bool
265 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
266 tree *orig_type, gimple **def_stmt, bool *promotion)
268 tree type = TREE_TYPE (name);
269 tree oprnd0;
270 enum vect_def_type dt;
272 stmt_vec_info def_stmt_info;
273 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
274 return false;
276 if (dt != vect_internal_def
277 && dt != vect_external_def && dt != vect_constant_def)
278 return false;
280 if (!*def_stmt)
281 return false;
283 if (!is_gimple_assign (*def_stmt))
284 return false;
286 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
287 return false;
289 oprnd0 = gimple_assign_rhs1 (*def_stmt);
291 *orig_type = TREE_TYPE (oprnd0);
292 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
293 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
294 return false;
296 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
297 *promotion = true;
298 else
299 *promotion = false;
301 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
302 return false;
304 return true;
307 /* Holds information about an input operand after some sign changes
308 and type promotions have been peeled away. */
309 class vect_unpromoted_value {
310 public:
311 vect_unpromoted_value ();
313 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
315 /* The value obtained after peeling away zero or more casts. */
316 tree op;
318 /* The type of OP. */
319 tree type;
321 /* The definition type of OP. */
322 vect_def_type dt;
324 /* If OP is the result of peeling at least one cast, and if the cast
325 of OP itself is a vectorizable statement, CASTER identifies that
326 statement, otherwise it is null. */
327 stmt_vec_info caster;
330 inline vect_unpromoted_value::vect_unpromoted_value ()
331 : op (NULL_TREE),
332 type (NULL_TREE),
333 dt (vect_uninitialized_def),
334 caster (NULL)
338 /* Set the operand to OP_IN, its definition type to DT_IN, and the
339 statement that casts it to CASTER_IN. */
341 inline void
342 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
343 stmt_vec_info caster_in)
345 op = op_in;
346 type = TREE_TYPE (op);
347 dt = dt_in;
348 caster = caster_in;
351 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
352 to reach some vectorizable inner operand OP', continuing as long as it
353 is possible to convert OP' back to OP using a possible sign change
354 followed by a possible promotion P. Return this OP', or null if OP is
355 not a vectorizable SSA name. If there is a promotion P, describe its
356 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
357 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
358 have more than one user.
360 A successful return means that it is possible to go from OP' to OP
361 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
362 whereas the cast from UNPROM to OP might be a promotion, a sign
363 change, or a nop.
365 E.g. say we have:
367 signed short *ptr = ...;
368 signed short C = *ptr;
369 unsigned short B = (unsigned short) C; // sign change
370 signed int A = (signed int) B; // unsigned promotion
371 ...possible other uses of A...
372 unsigned int OP = (unsigned int) A; // sign change
374 In this case it's possible to go directly from C to OP using:
376 OP = (unsigned int) (unsigned short) C;
377 +------------+ +--------------+
378 promotion sign change
380 so OP' would be C. The input to the promotion is B, so UNPROM
381 would describe B. */
383 static tree
384 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
385 vect_unpromoted_value *unprom,
386 bool *single_use_p = NULL)
388 tree res = NULL_TREE;
389 tree op_type = TREE_TYPE (op);
390 unsigned int orig_precision = TYPE_PRECISION (op_type);
391 unsigned int min_precision = orig_precision;
392 stmt_vec_info caster = NULL;
393 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
395 /* See whether OP is simple enough to vectorize. */
396 stmt_vec_info def_stmt_info;
397 gimple *def_stmt;
398 vect_def_type dt;
399 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
400 break;
402 /* If OP is the input of a demotion, skip over it to see whether
403 OP is itself the result of a promotion. If so, the combined
404 effect of the promotion and the demotion might fit the required
405 pattern, otherwise neither operation fits.
407 This copes with cases such as the result of an arithmetic
408 operation being truncated before being stored, and where that
409 arithmetic operation has been recognized as an over-widened one. */
410 if (TYPE_PRECISION (op_type) <= min_precision)
412 /* Use OP as the UNPROM described above if we haven't yet
413 found a promotion, or if using the new input preserves the
414 sign of the previous promotion. */
415 if (!res
416 || TYPE_PRECISION (unprom->type) == orig_precision
417 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
419 unprom->set_op (op, dt, caster);
420 min_precision = TYPE_PRECISION (op_type);
422 /* Stop if we've already seen a promotion and if this
423 conversion does more than change the sign. */
424 else if (TYPE_PRECISION (op_type)
425 != TYPE_PRECISION (unprom->type))
426 break;
428 /* The sequence now extends to OP. */
429 res = op;
432 /* See whether OP is defined by a cast. Record it as CASTER if
433 the cast is potentially vectorizable. */
434 if (!def_stmt)
435 break;
436 caster = def_stmt_info;
438 /* Ignore pattern statements, since we don't link uses for them. */
439 if (caster
440 && single_use_p
441 && !STMT_VINFO_RELATED_STMT (caster)
442 && !has_single_use (res))
443 *single_use_p = false;
445 gassign *assign = dyn_cast <gassign *> (def_stmt);
446 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
447 break;
449 /* Continue with the input to the cast. */
450 op = gimple_assign_rhs1 (def_stmt);
451 op_type = TREE_TYPE (op);
453 return res;
456 /* OP is an integer operand to an operation that returns TYPE, and we
457 want to treat the operation as a widening one. So far we can treat
458 it as widening from *COMMON_TYPE.
460 Return true if OP is suitable for such a widening operation,
461 either widening from *COMMON_TYPE or from some supertype of it.
462 Update *COMMON_TYPE to the supertype in the latter case.
464 SHIFT_P is true if OP is a shift amount. */
466 static bool
467 vect_joust_widened_integer (tree type, bool shift_p, tree op,
468 tree *common_type)
470 /* Calculate the minimum precision required by OP, without changing
471 the sign of either operand. */
472 unsigned int precision;
473 if (shift_p)
475 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
476 return false;
477 precision = TREE_INT_CST_LOW (op);
479 else
481 precision = wi::min_precision (wi::to_widest (op),
482 TYPE_SIGN (*common_type));
483 if (precision * 2 > TYPE_PRECISION (type))
484 return false;
487 /* If OP requires a wider type, switch to that type. The checks
488 above ensure that this is still narrower than the result. */
489 precision = vect_element_precision (precision);
490 if (TYPE_PRECISION (*common_type) < precision)
491 *common_type = build_nonstandard_integer_type
492 (precision, TYPE_UNSIGNED (*common_type));
493 return true;
496 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
497 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
499 static bool
500 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
502 if (types_compatible_p (*common_type, new_type))
503 return true;
505 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
506 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
507 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
508 return true;
510 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
511 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
512 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
514 *common_type = new_type;
515 return true;
518 /* We have mismatched signs, with the signed type being
519 no wider than the unsigned type. In this case we need
520 a wider signed type. */
521 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
522 TYPE_PRECISION (new_type));
523 precision *= 2;
524 if (precision * 2 > TYPE_PRECISION (type))
525 return false;
527 *common_type = build_nonstandard_integer_type (precision, false);
528 return true;
531 /* Check whether STMT_INFO can be viewed as a tree of integer operations
532 in which each node either performs CODE or WIDENED_CODE, and where
533 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
534 specifies the maximum number of leaf operands. SHIFT_P says whether
535 CODE and WIDENED_CODE are some sort of shift.
537 If STMT_INFO is such a tree, return the number of leaf operands
538 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
539 to a type that (a) is narrower than the result of STMT_INFO and
540 (b) can hold all leaf operand values.
542 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
543 exists. */
545 static unsigned int
546 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
547 tree_code widened_code, bool shift_p,
548 unsigned int max_nops,
549 vect_unpromoted_value *unprom, tree *common_type)
551 /* Check for an integer operation with the right code. */
552 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
553 if (!assign)
554 return 0;
556 tree_code rhs_code = gimple_assign_rhs_code (assign);
557 if (rhs_code != code && rhs_code != widened_code)
558 return 0;
560 tree type = gimple_expr_type (assign);
561 if (!INTEGRAL_TYPE_P (type))
562 return 0;
564 /* Assume that both operands will be leaf operands. */
565 max_nops -= 2;
567 /* Check the operands. */
568 unsigned int next_op = 0;
569 for (unsigned int i = 0; i < 2; ++i)
571 vect_unpromoted_value *this_unprom = &unprom[next_op];
572 unsigned int nops = 1;
573 tree op = gimple_op (assign, i + 1);
574 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
576 /* We already have a common type from earlier operands.
577 Update it to account for OP. */
578 this_unprom->set_op (op, vect_constant_def);
579 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
580 return 0;
582 else
584 /* Only allow shifts by constants. */
585 if (shift_p && i == 1)
586 return 0;
588 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
589 return 0;
591 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
593 /* The operand isn't widened. If STMT_INFO has the code
594 for an unwidened operation, recursively check whether
595 this operand is a node of the tree. */
596 if (rhs_code != code
597 || max_nops == 0
598 || this_unprom->dt != vect_internal_def)
599 return 0;
601 /* Give back the leaf slot allocated above now that we're
602 not treating this as a leaf operand. */
603 max_nops += 1;
605 /* Recursively process the definition of the operand. */
606 stmt_vec_info def_stmt_info
607 = vinfo->lookup_def (this_unprom->op);
608 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
609 widened_code, shift_p, max_nops,
610 this_unprom, common_type);
611 if (nops == 0)
612 return 0;
614 max_nops -= nops;
616 else
618 /* Make sure that the operand is narrower than the result. */
619 if (TYPE_PRECISION (this_unprom->type) * 2
620 > TYPE_PRECISION (type))
621 return 0;
623 /* Update COMMON_TYPE for the new operand. */
624 if (i == 0)
625 *common_type = this_unprom->type;
626 else if (!vect_joust_widened_type (type, this_unprom->type,
627 common_type))
628 return 0;
631 next_op += nops;
633 return next_op;
636 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
637 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
639 static tree
640 vect_recog_temp_ssa_var (tree type, gimple *stmt)
642 return make_temp_ssa_name (type, stmt, "patt");
645 /* STMT2_INFO describes a type conversion that could be split into STMT1
646 followed by a version of STMT2_INFO that takes NEW_RHS as its first
647 input. Try to do this using pattern statements, returning true on
648 success. */
650 static bool
651 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
652 gimple *stmt1, tree vectype)
654 if (is_pattern_stmt_p (stmt2_info))
656 /* STMT2_INFO is part of a pattern. Get the statement to which
657 the pattern is attached. */
658 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
659 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
661 if (dump_enabled_p ())
662 dump_printf_loc (MSG_NOTE, vect_location,
663 "Splitting pattern statement: %G", stmt2_info->stmt);
665 /* Since STMT2_INFO is a pattern statement, we can change it
666 in-situ without worrying about changing the code for the
667 containing block. */
668 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
670 if (dump_enabled_p ())
672 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
673 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
674 stmt2_info->stmt);
677 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
678 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
679 /* STMT2_INFO is the actual pattern statement. Add STMT1
680 to the end of the definition sequence. */
681 gimple_seq_add_stmt_without_update (def_seq, stmt1);
682 else
684 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
685 before it. */
686 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
687 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
689 return true;
691 else
693 /* STMT2_INFO doesn't yet have a pattern. Try to create a
694 two-statement pattern now. */
695 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
696 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
697 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
698 if (!lhs_vectype)
699 return false;
701 if (dump_enabled_p ())
702 dump_printf_loc (MSG_NOTE, vect_location,
703 "Splitting statement: %G", stmt2_info->stmt);
705 /* Add STMT1 as a singleton pattern definition sequence. */
706 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
707 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
708 gimple_seq_add_stmt_without_update (def_seq, stmt1);
710 /* Build the second of the two pattern statements. */
711 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
712 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
713 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
715 if (dump_enabled_p ())
717 dump_printf_loc (MSG_NOTE, vect_location,
718 "into pattern statements: %G", stmt1);
719 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
722 return true;
726 /* Convert UNPROM to TYPE and return the result, adding new statements
727 to STMT_INFO's pattern definition statements if no better way is
728 available. VECTYPE is the vector form of TYPE. */
730 static tree
731 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
732 vect_unpromoted_value *unprom, tree vectype)
734 /* Check for a no-op conversion. */
735 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
736 return unprom->op;
738 /* Allow the caller to create constant vect_unpromoted_values. */
739 if (TREE_CODE (unprom->op) == INTEGER_CST)
740 return wide_int_to_tree (type, wi::to_widest (unprom->op));
742 tree input = unprom->op;
743 if (unprom->caster)
745 tree lhs = gimple_get_lhs (unprom->caster->stmt);
746 tree lhs_type = TREE_TYPE (lhs);
748 /* If the result of the existing cast is the right width, use it
749 instead of the source of the cast. */
750 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
751 input = lhs;
752 /* If the precision we want is between the source and result
753 precisions of the existing cast, try splitting the cast into
754 two and tapping into a mid-way point. */
755 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
756 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
758 /* In order to preserve the semantics of the original cast,
759 give the mid-way point the same signedness as the input value.
761 It would be possible to use a signed type here instead if
762 TYPE is signed and UNPROM->TYPE is unsigned, but that would
763 make the sign of the midtype sensitive to the order in
764 which we process the statements, since the signedness of
765 TYPE is the signedness required by just one of possibly
766 many users. Also, unsigned promotions are usually as cheap
767 as or cheaper than signed ones, so it's better to keep an
768 unsigned promotion. */
769 tree midtype = build_nonstandard_integer_type
770 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
771 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
772 if (vec_midtype)
774 input = vect_recog_temp_ssa_var (midtype, NULL);
775 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
776 unprom->op);
777 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
778 vec_midtype))
779 append_pattern_def_seq (vinfo, stmt_info,
780 new_stmt, vec_midtype);
784 /* See if we can reuse an existing result. */
785 if (types_compatible_p (type, TREE_TYPE (input)))
786 return input;
789 /* We need a new conversion statement. */
790 tree new_op = vect_recog_temp_ssa_var (type, NULL);
791 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
793 /* If OP is an external value, see if we can insert the new statement
794 on an incoming edge. */
795 if (input == unprom->op && unprom->dt == vect_external_def)
796 if (edge e = vect_get_external_def_edge (vinfo, input))
798 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
799 gcc_assert (!new_bb);
800 return new_op;
803 /* As a (common) last resort, add the statement to the pattern itself. */
804 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
805 return new_op;
808 /* Invoke vect_convert_input for N elements of UNPROM and store the
809 result in the corresponding elements of RESULT. */
811 static void
812 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
813 tree *result, tree type, vect_unpromoted_value *unprom,
814 tree vectype)
816 for (unsigned int i = 0; i < n; ++i)
818 unsigned int j;
819 for (j = 0; j < i; ++j)
820 if (unprom[j].op == unprom[i].op)
821 break;
822 if (j < i)
823 result[i] = result[j];
824 else
825 result[i] = vect_convert_input (vinfo, stmt_info,
826 type, &unprom[i], vectype);
830 /* The caller has created a (possibly empty) sequence of pattern definition
831 statements followed by a single statement PATTERN_STMT. Cast the result
832 of this final statement to TYPE. If a new statement is needed, add
833 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
834 and return the new statement, otherwise return PATTERN_STMT as-is.
835 VECITYPE is the vector form of PATTERN_STMT's result type. */
837 static gimple *
838 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
839 gimple *pattern_stmt, tree vecitype)
841 tree lhs = gimple_get_lhs (pattern_stmt);
842 if (!types_compatible_p (type, TREE_TYPE (lhs)))
844 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
845 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
846 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
848 return pattern_stmt;
851 /* Return true if STMT_VINFO describes a reduction for which reassociation
852 is allowed. If STMT_INFO is part of a group, assume that it's part of
853 a reduction chain and optimistically assume that all statements
854 except the last allow reassociation.
855 Also require it to have code CODE and to be a reduction
856 in the outermost loop. When returning true, store the operands in
857 *OP0_OUT and *OP1_OUT. */
859 static bool
860 vect_reassociating_reduction_p (vec_info *vinfo,
861 stmt_vec_info stmt_info, tree_code code,
862 tree *op0_out, tree *op1_out)
864 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
865 if (!loop_info)
866 return false;
868 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
869 if (!assign || gimple_assign_rhs_code (assign) != code)
870 return false;
872 /* We don't allow changing the order of the computation in the inner-loop
873 when doing outer-loop vectorization. */
874 class loop *loop = LOOP_VINFO_LOOP (loop_info);
875 if (loop && nested_in_vect_loop_p (loop, stmt_info))
876 return false;
878 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
880 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
881 code))
882 return false;
884 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
885 return false;
887 *op0_out = gimple_assign_rhs1 (assign);
888 *op1_out = gimple_assign_rhs2 (assign);
889 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
890 std::swap (*op0_out, *op1_out);
891 return true;
894 /* Function vect_recog_dot_prod_pattern
896 Try to find the following pattern:
898 type x_t, y_t;
899 TYPE1 prod;
900 TYPE2 sum = init;
901 loop:
902 sum_0 = phi <init, sum_1>
903 S1 x_t = ...
904 S2 y_t = ...
905 S3 x_T = (TYPE1) x_t;
906 S4 y_T = (TYPE1) y_t;
907 S5 prod = x_T * y_T;
908 [S6 prod = (TYPE2) prod; #optional]
909 S7 sum_1 = prod + sum_0;
911 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
912 same size of 'TYPE1' or bigger. This is a special case of a reduction
913 computation.
915 Input:
917 * STMT_VINFO: The stmt from which the pattern search begins. In the
918 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
919 will be detected.
921 Output:
923 * TYPE_OUT: The type of the output of this pattern.
925 * Return value: A new stmt that will be used to replace the sequence of
926 stmts that constitute the pattern. In this case it will be:
927 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
929 Note: The dot-prod idiom is a widening reduction pattern that is
930 vectorized without preserving all the intermediate results. It
931 produces only N/2 (widened) results (by summing up pairs of
932 intermediate results) rather than all N results. Therefore, we
933 cannot allow this pattern when we want to get all the results and in
934 the correct order (as is the case when this computation is in an
935 inner-loop nested in an outer-loop that us being vectorized). */
937 static gimple *
938 vect_recog_dot_prod_pattern (vec_info *vinfo,
939 stmt_vec_info stmt_vinfo, tree *type_out)
941 tree oprnd0, oprnd1;
942 gimple *last_stmt = stmt_vinfo->stmt;
943 tree type, half_type;
944 gimple *pattern_stmt;
945 tree var;
947 /* Look for the following pattern
948 DX = (TYPE1) X;
949 DY = (TYPE1) Y;
950 DPROD = DX * DY;
951 DDPROD = (TYPE2) DPROD;
952 sum_1 = DDPROD + sum_0;
953 In which
954 - DX is double the size of X
955 - DY is double the size of Y
956 - DX, DY, DPROD all have the same type
957 - sum is the same size of DPROD or bigger
958 - sum has been recognized as a reduction variable.
960 This is equivalent to:
961 DPROD = X w* Y; #widen mult
962 sum_1 = DPROD w+ sum_0; #widen summation
964 DPROD = X w* Y; #widen mult
965 sum_1 = DPROD + sum_0; #summation
968 /* Starting from LAST_STMT, follow the defs of its uses in search
969 of the above pattern. */
971 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
972 &oprnd0, &oprnd1))
973 return NULL;
975 type = gimple_expr_type (last_stmt);
977 vect_unpromoted_value unprom_mult;
978 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
980 /* So far so good. Since last_stmt was detected as a (summation) reduction,
981 we know that oprnd1 is the reduction variable (defined by a loop-header
982 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
983 Left to check that oprnd0 is defined by a (widen_)mult_expr */
984 if (!oprnd0)
985 return NULL;
987 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
988 if (!mult_vinfo)
989 return NULL;
991 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
992 inside the loop (in case we are analyzing an outer-loop). */
993 vect_unpromoted_value unprom0[2];
994 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
995 false, 2, unprom0, &half_type))
996 return NULL;
998 /* If there are two widening operations, make sure they agree on
999 the sign of the extension. */
1000 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1001 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
1002 return NULL;
1004 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1006 tree half_vectype;
1007 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1008 type_out, &half_vectype))
1009 return NULL;
1011 /* Get the inputs in the appropriate types. */
1012 tree mult_oprnd[2];
1013 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1014 unprom0, half_vectype);
1016 var = vect_recog_temp_ssa_var (type, NULL);
1017 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1018 mult_oprnd[0], mult_oprnd[1], oprnd1);
1020 return pattern_stmt;
1024 /* Function vect_recog_sad_pattern
1026 Try to find the following Sum of Absolute Difference (SAD) pattern:
1028 type x_t, y_t;
1029 signed TYPE1 diff, abs_diff;
1030 TYPE2 sum = init;
1031 loop:
1032 sum_0 = phi <init, sum_1>
1033 S1 x_t = ...
1034 S2 y_t = ...
1035 S3 x_T = (TYPE1) x_t;
1036 S4 y_T = (TYPE1) y_t;
1037 S5 diff = x_T - y_T;
1038 S6 abs_diff = ABS_EXPR <diff>;
1039 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1040 S8 sum_1 = abs_diff + sum_0;
1042 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1043 same size of 'TYPE1' or bigger. This is a special case of a reduction
1044 computation.
1046 Input:
1048 * STMT_VINFO: The stmt from which the pattern search begins. In the
1049 example, when this function is called with S8, the pattern
1050 {S3,S4,S5,S6,S7,S8} will be detected.
1052 Output:
1054 * TYPE_OUT: The type of the output of this pattern.
1056 * Return value: A new stmt that will be used to replace the sequence of
1057 stmts that constitute the pattern. In this case it will be:
1058 SAD_EXPR <x_t, y_t, sum_0>
1061 static gimple *
1062 vect_recog_sad_pattern (vec_info *vinfo,
1063 stmt_vec_info stmt_vinfo, tree *type_out)
1065 gimple *last_stmt = stmt_vinfo->stmt;
1066 tree half_type;
1068 /* Look for the following pattern
1069 DX = (TYPE1) X;
1070 DY = (TYPE1) Y;
1071 DDIFF = DX - DY;
1072 DAD = ABS_EXPR <DDIFF>;
1073 DDPROD = (TYPE2) DPROD;
1074 sum_1 = DAD + sum_0;
1075 In which
1076 - DX is at least double the size of X
1077 - DY is at least double the size of Y
1078 - DX, DY, DDIFF, DAD all have the same type
1079 - sum is the same size of DAD or bigger
1080 - sum has been recognized as a reduction variable.
1082 This is equivalent to:
1083 DDIFF = X w- Y; #widen sub
1084 DAD = ABS_EXPR <DDIFF>;
1085 sum_1 = DAD w+ sum_0; #widen summation
1087 DDIFF = X w- Y; #widen sub
1088 DAD = ABS_EXPR <DDIFF>;
1089 sum_1 = DAD + sum_0; #summation
1092 /* Starting from LAST_STMT, follow the defs of its uses in search
1093 of the above pattern. */
1095 tree plus_oprnd0, plus_oprnd1;
1096 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1097 &plus_oprnd0, &plus_oprnd1))
1098 return NULL;
1100 tree sum_type = gimple_expr_type (last_stmt);
1102 /* Any non-truncating sequence of conversions is OK here, since
1103 with a successful match, the result of the ABS(U) is known to fit
1104 within the nonnegative range of the result type. (It cannot be the
1105 negative of the minimum signed value due to the range of the widening
1106 MINUS_EXPR.) */
1107 vect_unpromoted_value unprom_abs;
1108 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1109 &unprom_abs);
1111 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1112 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1113 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1114 Then check that plus_oprnd0 is defined by an abs_expr. */
1116 if (!plus_oprnd0)
1117 return NULL;
1119 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1120 if (!abs_stmt_vinfo)
1121 return NULL;
1123 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1124 inside the loop (in case we are analyzing an outer-loop). */
1125 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1126 if (!abs_stmt
1127 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1128 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1129 return NULL;
1131 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1132 tree abs_type = TREE_TYPE (abs_oprnd);
1133 if (TYPE_UNSIGNED (abs_type))
1134 return NULL;
1136 /* Peel off conversions from the ABS input. This can involve sign
1137 changes (e.g. from an unsigned subtraction to a signed ABS input)
1138 or signed promotion, but it can't include unsigned promotion.
1139 (Note that ABS of an unsigned promotion should have been folded
1140 away before now anyway.) */
1141 vect_unpromoted_value unprom_diff;
1142 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1143 &unprom_diff);
1144 if (!abs_oprnd)
1145 return NULL;
1146 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1147 && TYPE_UNSIGNED (unprom_diff.type))
1148 return NULL;
1150 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1151 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1152 if (!diff_stmt_vinfo)
1153 return NULL;
1155 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1156 inside the loop (in case we are analyzing an outer-loop). */
1157 vect_unpromoted_value unprom[2];
1158 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1159 false, 2, unprom, &half_type))
1160 return NULL;
1162 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1164 tree half_vectype;
1165 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1166 type_out, &half_vectype))
1167 return NULL;
1169 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1170 tree sad_oprnd[2];
1171 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1172 unprom, half_vectype);
1174 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1175 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1176 sad_oprnd[1], plus_oprnd1);
1178 return pattern_stmt;
1181 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1182 so that it can be treated as though it had the form:
1184 A_TYPE a;
1185 B_TYPE b;
1186 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1187 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1188 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1189 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1190 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1192 Try to replace the pattern with:
1194 A_TYPE a;
1195 B_TYPE b;
1196 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1197 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1198 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1199 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1201 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1203 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1204 name of the pattern being matched, for dump purposes. */
1206 static gimple *
1207 vect_recog_widen_op_pattern (vec_info *vinfo,
1208 stmt_vec_info last_stmt_info, tree *type_out,
1209 tree_code orig_code, tree_code wide_code,
1210 bool shift_p, const char *name)
1212 gimple *last_stmt = last_stmt_info->stmt;
1214 vect_unpromoted_value unprom[2];
1215 tree half_type;
1216 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1217 shift_p, 2, unprom, &half_type))
1218 return NULL;
1220 /* Pattern detected. */
1221 vect_pattern_detected (name, last_stmt);
1223 tree type = gimple_expr_type (last_stmt);
1224 tree itype = type;
1225 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1226 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1227 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1228 TYPE_UNSIGNED (half_type));
1230 /* Check target support */
1231 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1232 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1233 enum tree_code dummy_code;
1234 int dummy_int;
1235 auto_vec<tree> dummy_vec;
1236 if (!vectype
1237 || !vecitype
1238 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1239 vecitype, vectype,
1240 &dummy_code, &dummy_code,
1241 &dummy_int, &dummy_vec))
1242 return NULL;
1244 *type_out = get_vectype_for_scalar_type (vinfo, type);
1245 if (!*type_out)
1246 return NULL;
1248 tree oprnd[2];
1249 vect_convert_inputs (vinfo, last_stmt_info,
1250 2, oprnd, half_type, unprom, vectype);
1252 tree var = vect_recog_temp_ssa_var (itype, NULL);
1253 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1254 oprnd[0], oprnd[1]);
1256 return vect_convert_output (vinfo, last_stmt_info,
1257 type, pattern_stmt, vecitype);
1260 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1261 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1263 static gimple *
1264 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1265 tree *type_out)
1267 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1268 MULT_EXPR, WIDEN_MULT_EXPR, false,
1269 "vect_recog_widen_mult_pattern");
1272 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1273 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1275 static gimple *
1276 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1277 tree *type_out)
1279 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1280 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1281 "vect_recog_widen_plus_pattern");
1284 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1285 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1286 static gimple *
1287 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1288 tree *type_out)
1290 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1291 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1292 "vect_recog_widen_minus_pattern");
1295 /* Function vect_recog_pow_pattern
1297 Try to find the following pattern:
1299 x = POW (y, N);
1301 with POW being one of pow, powf, powi, powif and N being
1302 either 2 or 0.5.
1304 Input:
1306 * STMT_VINFO: The stmt from which the pattern search begins.
1308 Output:
1310 * TYPE_OUT: The type of the output of this pattern.
1312 * Return value: A new stmt that will be used to replace the sequence of
1313 stmts that constitute the pattern. In this case it will be:
1314 x = x * x
1316 x = sqrt (x)
1319 static gimple *
1320 vect_recog_pow_pattern (vec_info *vinfo,
1321 stmt_vec_info stmt_vinfo, tree *type_out)
1323 gimple *last_stmt = stmt_vinfo->stmt;
1324 tree base, exp;
1325 gimple *stmt;
1326 tree var;
1328 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1329 return NULL;
1331 switch (gimple_call_combined_fn (last_stmt))
1333 CASE_CFN_POW:
1334 CASE_CFN_POWI:
1335 break;
1337 default:
1338 return NULL;
1341 base = gimple_call_arg (last_stmt, 0);
1342 exp = gimple_call_arg (last_stmt, 1);
1343 if (TREE_CODE (exp) != REAL_CST
1344 && TREE_CODE (exp) != INTEGER_CST)
1346 if (flag_unsafe_math_optimizations
1347 && TREE_CODE (base) == REAL_CST
1348 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1350 combined_fn log_cfn;
1351 built_in_function exp_bfn;
1352 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1354 case BUILT_IN_POW:
1355 log_cfn = CFN_BUILT_IN_LOG;
1356 exp_bfn = BUILT_IN_EXP;
1357 break;
1358 case BUILT_IN_POWF:
1359 log_cfn = CFN_BUILT_IN_LOGF;
1360 exp_bfn = BUILT_IN_EXPF;
1361 break;
1362 case BUILT_IN_POWL:
1363 log_cfn = CFN_BUILT_IN_LOGL;
1364 exp_bfn = BUILT_IN_EXPL;
1365 break;
1366 default:
1367 return NULL;
1369 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1370 tree exp_decl = builtin_decl_implicit (exp_bfn);
1371 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1372 does that, but if C is a power of 2, we want to use
1373 exp2 (log2 (C) * x) in the non-vectorized version, but for
1374 vectorization we don't have vectorized exp2. */
1375 if (logc
1376 && TREE_CODE (logc) == REAL_CST
1377 && exp_decl
1378 && lookup_attribute ("omp declare simd",
1379 DECL_ATTRIBUTES (exp_decl)))
1381 cgraph_node *node = cgraph_node::get_create (exp_decl);
1382 if (node->simd_clones == NULL)
1384 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1385 || node->definition)
1386 return NULL;
1387 expand_simd_clones (node);
1388 if (node->simd_clones == NULL)
1389 return NULL;
1391 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1392 if (!*type_out)
1393 return NULL;
1394 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1395 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1396 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1397 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1398 g = gimple_build_call (exp_decl, 1, def);
1399 gimple_call_set_lhs (g, res);
1400 return g;
1404 return NULL;
1407 /* We now have a pow or powi builtin function call with a constant
1408 exponent. */
1410 /* Catch squaring. */
1411 if ((tree_fits_shwi_p (exp)
1412 && tree_to_shwi (exp) == 2)
1413 || (TREE_CODE (exp) == REAL_CST
1414 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1416 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1417 TREE_TYPE (base), type_out))
1418 return NULL;
1420 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1421 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1422 return stmt;
1425 /* Catch square root. */
1426 if (TREE_CODE (exp) == REAL_CST
1427 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1429 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1430 if (*type_out
1431 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1432 OPTIMIZE_FOR_SPEED))
1434 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1435 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1436 gimple_call_set_lhs (stmt, var);
1437 gimple_call_set_nothrow (stmt, true);
1438 return stmt;
1442 return NULL;
1446 /* Function vect_recog_widen_sum_pattern
1448 Try to find the following pattern:
1450 type x_t;
1451 TYPE x_T, sum = init;
1452 loop:
1453 sum_0 = phi <init, sum_1>
1454 S1 x_t = *p;
1455 S2 x_T = (TYPE) x_t;
1456 S3 sum_1 = x_T + sum_0;
1458 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1459 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1460 a special case of a reduction computation.
1462 Input:
1464 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1465 when this function is called with S3, the pattern {S2,S3} will be detected.
1467 Output:
1469 * TYPE_OUT: The type of the output of this pattern.
1471 * Return value: A new stmt that will be used to replace the sequence of
1472 stmts that constitute the pattern. In this case it will be:
1473 WIDEN_SUM <x_t, sum_0>
1475 Note: The widening-sum idiom is a widening reduction pattern that is
1476 vectorized without preserving all the intermediate results. It
1477 produces only N/2 (widened) results (by summing up pairs of
1478 intermediate results) rather than all N results. Therefore, we
1479 cannot allow this pattern when we want to get all the results and in
1480 the correct order (as is the case when this computation is in an
1481 inner-loop nested in an outer-loop that us being vectorized). */
1483 static gimple *
1484 vect_recog_widen_sum_pattern (vec_info *vinfo,
1485 stmt_vec_info stmt_vinfo, tree *type_out)
1487 gimple *last_stmt = stmt_vinfo->stmt;
1488 tree oprnd0, oprnd1;
1489 tree type;
1490 gimple *pattern_stmt;
1491 tree var;
1493 /* Look for the following pattern
1494 DX = (TYPE) X;
1495 sum_1 = DX + sum_0;
1496 In which DX is at least double the size of X, and sum_1 has been
1497 recognized as a reduction variable.
1500 /* Starting from LAST_STMT, follow the defs of its uses in search
1501 of the above pattern. */
1503 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1504 &oprnd0, &oprnd1))
1505 return NULL;
1507 type = gimple_expr_type (last_stmt);
1509 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1510 we know that oprnd1 is the reduction variable (defined by a loop-header
1511 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1512 Left to check that oprnd0 is defined by a cast from type 'type' to type
1513 'TYPE'. */
1515 vect_unpromoted_value unprom0;
1516 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1517 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1518 return NULL;
1520 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1522 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1523 unprom0.type, type_out))
1524 return NULL;
1526 var = vect_recog_temp_ssa_var (type, NULL);
1527 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1529 return pattern_stmt;
1532 /* Recognize cases in which an operation is performed in one type WTYPE
1533 but could be done more efficiently in a narrower type NTYPE. For example,
1534 if we have:
1536 ATYPE a; // narrower than NTYPE
1537 BTYPE b; // narrower than NTYPE
1538 WTYPE aw = (WTYPE) a;
1539 WTYPE bw = (WTYPE) b;
1540 WTYPE res = aw + bw; // only uses of aw and bw
1542 then it would be more efficient to do:
1544 NTYPE an = (NTYPE) a;
1545 NTYPE bn = (NTYPE) b;
1546 NTYPE resn = an + bn;
1547 WTYPE res = (WTYPE) resn;
1549 Other situations include things like:
1551 ATYPE a; // NTYPE or narrower
1552 WTYPE aw = (WTYPE) a;
1553 WTYPE res = aw + b;
1555 when only "(NTYPE) res" is significant. In that case it's more efficient
1556 to truncate "b" and do the operation on NTYPE instead:
1558 NTYPE an = (NTYPE) a;
1559 NTYPE bn = (NTYPE) b; // truncation
1560 NTYPE resn = an + bn;
1561 WTYPE res = (WTYPE) resn;
1563 All users of "res" should then use "resn" instead, making the final
1564 statement dead (not marked as relevant). The final statement is still
1565 needed to maintain the type correctness of the IR.
1567 vect_determine_precisions has already determined the minimum
1568 precison of the operation and the minimum precision required
1569 by users of the result. */
1571 static gimple *
1572 vect_recog_over_widening_pattern (vec_info *vinfo,
1573 stmt_vec_info last_stmt_info, tree *type_out)
1575 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1576 if (!last_stmt)
1577 return NULL;
1579 /* See whether we have found that this operation can be done on a
1580 narrower type without changing its semantics. */
1581 unsigned int new_precision = last_stmt_info->operation_precision;
1582 if (!new_precision)
1583 return NULL;
1585 tree lhs = gimple_assign_lhs (last_stmt);
1586 tree type = TREE_TYPE (lhs);
1587 tree_code code = gimple_assign_rhs_code (last_stmt);
1589 /* Punt for reductions where we don't handle the type conversions. */
1590 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1591 return NULL;
1593 /* Keep the first operand of a COND_EXPR as-is: only the other two
1594 operands are interesting. */
1595 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1597 /* Check the operands. */
1598 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1599 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1600 unprom.quick_grow (nops);
1601 unsigned int min_precision = 0;
1602 bool single_use_p = false;
1603 for (unsigned int i = 0; i < nops; ++i)
1605 tree op = gimple_op (last_stmt, first_op + i);
1606 if (TREE_CODE (op) == INTEGER_CST)
1607 unprom[i].set_op (op, vect_constant_def);
1608 else if (TREE_CODE (op) == SSA_NAME)
1610 bool op_single_use_p = true;
1611 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1612 &op_single_use_p))
1613 return NULL;
1614 /* If:
1616 (1) N bits of the result are needed;
1617 (2) all inputs are widened from M<N bits; and
1618 (3) one operand OP is a single-use SSA name
1620 we can shift the M->N widening from OP to the output
1621 without changing the number or type of extensions involved.
1622 This then reduces the number of copies of STMT_INFO.
1624 If instead of (3) more than one operand is a single-use SSA name,
1625 shifting the extension to the output is even more of a win.
1627 If instead:
1629 (1) N bits of the result are needed;
1630 (2) one operand OP2 is widened from M2<N bits;
1631 (3) another operand OP1 is widened from M1<M2 bits; and
1632 (4) both OP1 and OP2 are single-use
1634 the choice is between:
1636 (a) truncating OP2 to M1, doing the operation on M1,
1637 and then widening the result to N
1639 (b) widening OP1 to M2, doing the operation on M2, and then
1640 widening the result to N
1642 Both shift the M2->N widening of the inputs to the output.
1643 (a) additionally shifts the M1->M2 widening to the output;
1644 it requires fewer copies of STMT_INFO but requires an extra
1645 M2->M1 truncation.
1647 Which is better will depend on the complexity and cost of
1648 STMT_INFO, which is hard to predict at this stage. However,
1649 a clear tie-breaker in favor of (b) is the fact that the
1650 truncation in (a) increases the length of the operation chain.
1652 If instead of (4) only one of OP1 or OP2 is single-use,
1653 (b) is still a win over doing the operation in N bits:
1654 it still shifts the M2->N widening on the single-use operand
1655 to the output and reduces the number of STMT_INFO copies.
1657 If neither operand is single-use then operating on fewer than
1658 N bits might lead to more extensions overall. Whether it does
1659 or not depends on global information about the vectorization
1660 region, and whether that's a good trade-off would again
1661 depend on the complexity and cost of the statements involved,
1662 as well as things like register pressure that are not normally
1663 modelled at this stage. We therefore ignore these cases
1664 and just optimize the clear single-use wins above.
1666 Thus we take the maximum precision of the unpromoted operands
1667 and record whether any operand is single-use. */
1668 if (unprom[i].dt == vect_internal_def)
1670 min_precision = MAX (min_precision,
1671 TYPE_PRECISION (unprom[i].type));
1672 single_use_p |= op_single_use_p;
1675 else
1676 return NULL;
1679 /* Although the operation could be done in operation_precision, we have
1680 to balance that against introducing extra truncations or extensions.
1681 Calculate the minimum precision that can be handled efficiently.
1683 The loop above determined that the operation could be handled
1684 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1685 extension from the inputs to the output without introducing more
1686 instructions, and would reduce the number of instructions required
1687 for STMT_INFO itself.
1689 vect_determine_precisions has also determined that the result only
1690 needs min_output_precision bits. Truncating by a factor of N times
1691 requires a tree of N - 1 instructions, so if TYPE is N times wider
1692 than min_output_precision, doing the operation in TYPE and truncating
1693 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1694 In contrast:
1696 - truncating the input to a unary operation and doing the operation
1697 in the new type requires at most N - 1 + 1 = N instructions per
1698 output vector
1700 - doing the same for a binary operation requires at most
1701 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1703 Both unary and binary operations require fewer instructions than
1704 this if the operands were extended from a suitable truncated form.
1705 Thus there is usually nothing to lose by doing operations in
1706 min_output_precision bits, but there can be something to gain. */
1707 if (!single_use_p)
1708 min_precision = last_stmt_info->min_output_precision;
1709 else
1710 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1712 /* Apply the minimum efficient precision we just calculated. */
1713 if (new_precision < min_precision)
1714 new_precision = min_precision;
1715 new_precision = vect_element_precision (new_precision);
1716 if (new_precision >= TYPE_PRECISION (type))
1717 return NULL;
1719 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1721 *type_out = get_vectype_for_scalar_type (vinfo, type);
1722 if (!*type_out)
1723 return NULL;
1725 /* We've found a viable pattern. Get the new type of the operation. */
1726 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1727 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1729 /* If we're truncating an operation, we need to make sure that we
1730 don't introduce new undefined overflow. The codes tested here are
1731 a subset of those accepted by vect_truncatable_operation_p. */
1732 tree op_type = new_type;
1733 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1734 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1735 op_type = build_nonstandard_integer_type (new_precision, true);
1737 /* We specifically don't check here whether the target supports the
1738 new operation, since it might be something that a later pattern
1739 wants to rewrite anyway. If targets have a minimum element size
1740 for some optabs, we should pattern-match smaller ops to larger ops
1741 where beneficial. */
1742 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1743 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1744 if (!new_vectype || !op_vectype)
1745 return NULL;
1747 if (dump_enabled_p ())
1748 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1749 type, new_type);
1751 /* Calculate the rhs operands for an operation on OP_TYPE. */
1752 tree ops[3] = {};
1753 for (unsigned int i = 1; i < first_op; ++i)
1754 ops[i - 1] = gimple_op (last_stmt, i);
1755 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
1756 op_type, &unprom[0], op_vectype);
1758 /* Use the operation to produce a result of type OP_TYPE. */
1759 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1760 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1761 ops[0], ops[1], ops[2]);
1762 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1764 if (dump_enabled_p ())
1765 dump_printf_loc (MSG_NOTE, vect_location,
1766 "created pattern stmt: %G", pattern_stmt);
1768 /* Convert back to the original signedness, if OP_TYPE is different
1769 from NEW_TYPE. */
1770 if (op_type != new_type)
1771 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
1772 pattern_stmt, op_vectype);
1774 /* Promote the result to the original type. */
1775 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
1776 pattern_stmt, new_vectype);
1778 return pattern_stmt;
1781 /* Recognize the following patterns:
1783 ATYPE a; // narrower than TYPE
1784 BTYPE b; // narrower than TYPE
1786 1) Multiply high with scaling
1787 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1788 2) ... or also with rounding
1789 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1791 where only the bottom half of res is used. */
1793 static gimple *
1794 vect_recog_mulhs_pattern (vec_info *vinfo,
1795 stmt_vec_info last_stmt_info, tree *type_out)
1797 /* Check for a right shift. */
1798 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1799 if (!last_stmt
1800 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1801 return NULL;
1803 /* Check that the shift result is wider than the users of the
1804 result need (i.e. that narrowing would be a natural choice). */
1805 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1806 unsigned int target_precision
1807 = vect_element_precision (last_stmt_info->min_output_precision);
1808 if (!INTEGRAL_TYPE_P (lhs_type)
1809 || target_precision >= TYPE_PRECISION (lhs_type))
1810 return NULL;
1812 /* Look through any change in sign on the outer shift input. */
1813 vect_unpromoted_value unprom_rshift_input;
1814 tree rshift_input = vect_look_through_possible_promotion
1815 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1816 if (!rshift_input
1817 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1818 != TYPE_PRECISION (lhs_type))
1819 return NULL;
1821 /* Get the definition of the shift input. */
1822 stmt_vec_info rshift_input_stmt_info
1823 = vect_get_internal_def (vinfo, rshift_input);
1824 if (!rshift_input_stmt_info)
1825 return NULL;
1826 gassign *rshift_input_stmt
1827 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1828 if (!rshift_input_stmt)
1829 return NULL;
1831 stmt_vec_info mulh_stmt_info;
1832 tree scale_term;
1833 internal_fn ifn;
1834 unsigned int expect_offset;
1836 /* Check for the presence of the rounding term. */
1837 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1839 /* Check that the outer shift was by 1. */
1840 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1841 return NULL;
1843 /* Check that the second operand of the PLUS_EXPR is 1. */
1844 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1845 return NULL;
1847 /* Look through any change in sign on the addition input. */
1848 vect_unpromoted_value unprom_plus_input;
1849 tree plus_input = vect_look_through_possible_promotion
1850 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1851 if (!plus_input
1852 || TYPE_PRECISION (TREE_TYPE (plus_input))
1853 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
1854 return NULL;
1856 /* Get the definition of the multiply-high-scale part. */
1857 stmt_vec_info plus_input_stmt_info
1858 = vect_get_internal_def (vinfo, plus_input);
1859 if (!plus_input_stmt_info)
1860 return NULL;
1861 gassign *plus_input_stmt
1862 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1863 if (!plus_input_stmt
1864 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1865 return NULL;
1867 /* Look through any change in sign on the scaling input. */
1868 vect_unpromoted_value unprom_scale_input;
1869 tree scale_input = vect_look_through_possible_promotion
1870 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1871 if (!scale_input
1872 || TYPE_PRECISION (TREE_TYPE (scale_input))
1873 != TYPE_PRECISION (TREE_TYPE (plus_input)))
1874 return NULL;
1876 /* Get the definition of the multiply-high part. */
1877 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1878 if (!mulh_stmt_info)
1879 return NULL;
1881 /* Get the scaling term. */
1882 scale_term = gimple_assign_rhs2 (plus_input_stmt);
1884 expect_offset = target_precision + 2;
1885 ifn = IFN_MULHRS;
1887 else
1889 mulh_stmt_info = rshift_input_stmt_info;
1890 scale_term = gimple_assign_rhs2 (last_stmt);
1892 expect_offset = target_precision + 1;
1893 ifn = IFN_MULHS;
1896 /* Check that the scaling factor is correct. */
1897 if (TREE_CODE (scale_term) != INTEGER_CST
1898 || wi::to_widest (scale_term) + expect_offset
1899 != TYPE_PRECISION (lhs_type))
1900 return NULL;
1902 /* Check whether the scaling input term can be seen as two widened
1903 inputs multiplied together. */
1904 vect_unpromoted_value unprom_mult[2];
1905 tree new_type;
1906 unsigned int nops
1907 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1908 false, 2, unprom_mult, &new_type);
1909 if (nops != 2)
1910 return NULL;
1912 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1914 /* Adjust output precision. */
1915 if (TYPE_PRECISION (new_type) < target_precision)
1916 new_type = build_nonstandard_integer_type
1917 (target_precision, TYPE_UNSIGNED (new_type));
1919 /* Check for target support. */
1920 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1921 if (!new_vectype
1922 || !direct_internal_fn_supported_p
1923 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1924 return NULL;
1926 /* The IR requires a valid vector type for the cast result, even though
1927 it's likely to be discarded. */
1928 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
1929 if (!*type_out)
1930 return NULL;
1932 /* Generate the IFN_MULHRS call. */
1933 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1934 tree new_ops[2];
1935 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
1936 unprom_mult, new_vectype);
1937 gcall *mulhrs_stmt
1938 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1939 gimple_call_set_lhs (mulhrs_stmt, new_var);
1940 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_NOTE, vect_location,
1944 "created pattern stmt: %G", mulhrs_stmt);
1946 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
1947 mulhrs_stmt, new_vectype);
1950 /* Recognize the patterns:
1952 ATYPE a; // narrower than TYPE
1953 BTYPE b; // narrower than TYPE
1954 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1955 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1957 where only the bottom half of avg is used. Try to transform them into:
1959 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1960 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1962 followed by:
1964 TYPE avg = (TYPE) avg';
1966 where NTYPE is no wider than half of TYPE. Since only the bottom half
1967 of avg is used, all or part of the cast of avg' should become redundant.
1969 If there is no target support available, generate code to distribute rshift
1970 over plus and add a carry. */
1972 static gimple *
1973 vect_recog_average_pattern (vec_info *vinfo,
1974 stmt_vec_info last_stmt_info, tree *type_out)
1976 /* Check for a shift right by one bit. */
1977 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1978 if (!last_stmt
1979 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1980 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1981 return NULL;
1983 /* Check that the shift result is wider than the users of the
1984 result need (i.e. that narrowing would be a natural choice). */
1985 tree lhs = gimple_assign_lhs (last_stmt);
1986 tree type = TREE_TYPE (lhs);
1987 unsigned int target_precision
1988 = vect_element_precision (last_stmt_info->min_output_precision);
1989 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1990 return NULL;
1992 /* Look through any change in sign on the shift input. */
1993 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1994 vect_unpromoted_value unprom_plus;
1995 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1996 &unprom_plus);
1997 if (!rshift_rhs
1998 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1999 return NULL;
2001 /* Get the definition of the shift input. */
2002 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2003 if (!plus_stmt_info)
2004 return NULL;
2006 /* Check whether the shift input can be seen as a tree of additions on
2007 2 or 3 widened inputs.
2009 Note that the pattern should be a win even if the result of one or
2010 more additions is reused elsewhere: if the pattern matches, we'd be
2011 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2012 internal_fn ifn = IFN_AVG_FLOOR;
2013 vect_unpromoted_value unprom[3];
2014 tree new_type;
2015 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2016 WIDEN_PLUS_EXPR, false, 3,
2017 unprom, &new_type);
2018 if (nops == 0)
2019 return NULL;
2020 if (nops == 3)
2022 /* Check that one operand is 1. */
2023 unsigned int i;
2024 for (i = 0; i < 3; ++i)
2025 if (integer_onep (unprom[i].op))
2026 break;
2027 if (i == 3)
2028 return NULL;
2029 /* Throw away the 1 operand and keep the other two. */
2030 if (i < 2)
2031 unprom[i] = unprom[2];
2032 ifn = IFN_AVG_CEIL;
2035 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2037 /* We know that:
2039 (a) the operation can be viewed as:
2041 TYPE widened0 = (TYPE) UNPROM[0];
2042 TYPE widened1 = (TYPE) UNPROM[1];
2043 TYPE tmp1 = widened0 + widened1 {+ 1};
2044 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2046 (b) the first two statements are equivalent to:
2048 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2049 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2051 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2052 where sensible;
2054 (d) all the operations can be performed correctly at twice the width of
2055 NEW_TYPE, due to the nature of the average operation; and
2057 (e) users of the result of the right shift need only TARGET_PRECISION
2058 bits, where TARGET_PRECISION is no more than half of TYPE's
2059 precision.
2061 Under these circumstances, the only situation in which NEW_TYPE
2062 could be narrower than TARGET_PRECISION is if widened0, widened1
2063 and an addition result are all used more than once. Thus we can
2064 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2065 as "free", whereas widening the result of the average instruction
2066 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2067 therefore better not to go narrower than TARGET_PRECISION. */
2068 if (TYPE_PRECISION (new_type) < target_precision)
2069 new_type = build_nonstandard_integer_type (target_precision,
2070 TYPE_UNSIGNED (new_type));
2072 /* Check for target support. */
2073 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2074 if (!new_vectype)
2075 return NULL;
2077 bool fallback_p = false;
2079 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2081 else if (TYPE_UNSIGNED (new_type)
2082 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2083 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2084 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2085 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2086 fallback_p = true;
2087 else
2088 return NULL;
2090 /* The IR requires a valid vector type for the cast result, even though
2091 it's likely to be discarded. */
2092 *type_out = get_vectype_for_scalar_type (vinfo, type);
2093 if (!*type_out)
2094 return NULL;
2096 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2097 tree new_ops[2];
2098 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2099 unprom, new_vectype);
2101 if (fallback_p)
2103 /* As a fallback, generate code for following sequence:
2105 shifted_op0 = new_ops[0] >> 1;
2106 shifted_op1 = new_ops[1] >> 1;
2107 sum_of_shifted = shifted_op0 + shifted_op1;
2108 unmasked_carry = new_ops[0] and/or new_ops[1];
2109 carry = unmasked_carry & 1;
2110 new_var = sum_of_shifted + carry;
2113 tree one_cst = build_one_cst (new_type);
2114 gassign *g;
2116 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2117 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2118 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2120 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2121 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2122 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2124 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2125 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2126 shifted_op0, shifted_op1);
2127 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2129 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2130 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2131 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2132 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2134 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2135 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2136 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2138 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2139 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2142 /* Generate the IFN_AVG* call. */
2143 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2144 new_ops[1]);
2145 gimple_call_set_lhs (average_stmt, new_var);
2146 gimple_set_location (average_stmt, gimple_location (last_stmt));
2148 if (dump_enabled_p ())
2149 dump_printf_loc (MSG_NOTE, vect_location,
2150 "created pattern stmt: %G", average_stmt);
2152 return vect_convert_output (vinfo, last_stmt_info,
2153 type, average_stmt, new_vectype);
2156 /* Recognize cases in which the input to a cast is wider than its
2157 output, and the input is fed by a widening operation. Fold this
2158 by removing the unnecessary intermediate widening. E.g.:
2160 unsigned char a;
2161 unsigned int b = (unsigned int) a;
2162 unsigned short c = (unsigned short) b;
2166 unsigned short c = (unsigned short) a;
2168 Although this is rare in input IR, it is an expected side-effect
2169 of the over-widening pattern above.
2171 This is beneficial also for integer-to-float conversions, if the
2172 widened integer has more bits than the float, and if the unwidened
2173 input doesn't. */
2175 static gimple *
2176 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2177 stmt_vec_info last_stmt_info, tree *type_out)
2179 /* Check for a cast, including an integer-to-float conversion. */
2180 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2181 if (!last_stmt)
2182 return NULL;
2183 tree_code code = gimple_assign_rhs_code (last_stmt);
2184 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2185 return NULL;
2187 /* Make sure that the rhs is a scalar with a natural bitsize. */
2188 tree lhs = gimple_assign_lhs (last_stmt);
2189 if (!lhs)
2190 return NULL;
2191 tree lhs_type = TREE_TYPE (lhs);
2192 scalar_mode lhs_mode;
2193 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2194 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2195 return NULL;
2197 /* Check for a narrowing operation (from a vector point of view). */
2198 tree rhs = gimple_assign_rhs1 (last_stmt);
2199 tree rhs_type = TREE_TYPE (rhs);
2200 if (!INTEGRAL_TYPE_P (rhs_type)
2201 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2202 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2203 return NULL;
2205 /* Try to find an unpromoted input. */
2206 vect_unpromoted_value unprom;
2207 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2208 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2209 return NULL;
2211 /* If the bits above RHS_TYPE matter, make sure that they're the
2212 same when extending from UNPROM as they are when extending from RHS. */
2213 if (!INTEGRAL_TYPE_P (lhs_type)
2214 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2215 return NULL;
2217 /* We can get the same result by casting UNPROM directly, to avoid
2218 the unnecessary widening and narrowing. */
2219 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2221 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2222 if (!*type_out)
2223 return NULL;
2225 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2226 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2227 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2229 return pattern_stmt;
2232 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2233 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2235 static gimple *
2236 vect_recog_widen_shift_pattern (vec_info *vinfo,
2237 stmt_vec_info last_stmt_info, tree *type_out)
2239 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2240 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2241 "vect_recog_widen_shift_pattern");
2244 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2246 type a_t, b_t, c_t;
2248 S0 a_t = b_t r<< c_t;
2250 Input/Output:
2252 * STMT_VINFO: The stmt from which the pattern search begins,
2253 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2254 with a sequence:
2256 S1 d_t = -c_t;
2257 S2 e_t = d_t & (B - 1);
2258 S3 f_t = b_t << c_t;
2259 S4 g_t = b_t >> e_t;
2260 S0 a_t = f_t | g_t;
2262 where B is element bitsize of type.
2264 Output:
2266 * TYPE_OUT: The type of the output of this pattern.
2268 * Return value: A new stmt that will be used to replace the rotate
2269 S0 stmt. */
2271 static gimple *
2272 vect_recog_rotate_pattern (vec_info *vinfo,
2273 stmt_vec_info stmt_vinfo, tree *type_out)
2275 gimple *last_stmt = stmt_vinfo->stmt;
2276 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2277 gimple *pattern_stmt, *def_stmt;
2278 enum tree_code rhs_code;
2279 enum vect_def_type dt;
2280 optab optab1, optab2;
2281 edge ext_def = NULL;
2282 bool bswap16_p = false;
2284 if (is_gimple_assign (last_stmt))
2286 rhs_code = gimple_assign_rhs_code (last_stmt);
2287 switch (rhs_code)
2289 case LROTATE_EXPR:
2290 case RROTATE_EXPR:
2291 break;
2292 default:
2293 return NULL;
2296 lhs = gimple_assign_lhs (last_stmt);
2297 oprnd0 = gimple_assign_rhs1 (last_stmt);
2298 type = TREE_TYPE (oprnd0);
2299 oprnd1 = gimple_assign_rhs2 (last_stmt);
2301 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2303 /* __builtin_bswap16 (x) is another form of x r>> 8.
2304 The vectorizer has bswap support, but only if the argument isn't
2305 promoted. */
2306 lhs = gimple_call_lhs (last_stmt);
2307 oprnd0 = gimple_call_arg (last_stmt, 0);
2308 type = TREE_TYPE (oprnd0);
2309 if (!lhs
2310 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2311 || TYPE_PRECISION (type) <= 16
2312 || TREE_CODE (oprnd0) != SSA_NAME
2313 || BITS_PER_UNIT != 8
2314 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2315 return NULL;
2317 stmt_vec_info def_stmt_info;
2318 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2319 return NULL;
2321 if (dt != vect_internal_def)
2322 return NULL;
2324 if (gimple_assign_cast_p (def_stmt))
2326 def = gimple_assign_rhs1 (def_stmt);
2327 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2328 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2329 oprnd0 = def;
2332 type = TREE_TYPE (lhs);
2333 vectype = get_vectype_for_scalar_type (vinfo, type);
2334 if (vectype == NULL_TREE)
2335 return NULL;
2337 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2339 /* The encoding uses one stepped pattern for each byte in the
2340 16-bit word. */
2341 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2342 for (unsigned i = 0; i < 3; ++i)
2343 for (unsigned j = 0; j < 2; ++j)
2344 elts.quick_push ((i + 1) * 2 - j - 1);
2346 vec_perm_indices indices (elts, 1,
2347 TYPE_VECTOR_SUBPARTS (char_vectype));
2348 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2350 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2351 undo the argument promotion. */
2352 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2354 def = vect_recog_temp_ssa_var (type, NULL);
2355 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2356 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2357 oprnd0 = def;
2360 /* Pattern detected. */
2361 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2363 *type_out = vectype;
2365 /* Pattern supported. Create a stmt to be used to replace the
2366 pattern, with the unpromoted argument. */
2367 var = vect_recog_temp_ssa_var (type, NULL);
2368 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2369 1, oprnd0);
2370 gimple_call_set_lhs (pattern_stmt, var);
2371 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2372 gimple_call_fntype (last_stmt));
2373 return pattern_stmt;
2377 oprnd1 = build_int_cst (integer_type_node, 8);
2378 rhs_code = LROTATE_EXPR;
2379 bswap16_p = true;
2381 else
2382 return NULL;
2384 if (TREE_CODE (oprnd0) != SSA_NAME
2385 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2386 || !INTEGRAL_TYPE_P (type)
2387 || !TYPE_UNSIGNED (type))
2388 return NULL;
2390 stmt_vec_info def_stmt_info;
2391 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2392 return NULL;
2394 if (dt != vect_internal_def
2395 && dt != vect_constant_def
2396 && dt != vect_external_def)
2397 return NULL;
2399 vectype = get_vectype_for_scalar_type (vinfo, type);
2400 if (vectype == NULL_TREE)
2401 return NULL;
2403 /* If vector/vector or vector/scalar rotate is supported by the target,
2404 don't do anything here. */
2405 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2406 if (optab1
2407 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2409 use_rotate:
2410 if (bswap16_p)
2412 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2414 def = vect_recog_temp_ssa_var (type, NULL);
2415 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2416 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2417 oprnd0 = def;
2420 /* Pattern detected. */
2421 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2423 *type_out = vectype;
2425 /* Pattern supported. Create a stmt to be used to replace the
2426 pattern. */
2427 var = vect_recog_temp_ssa_var (type, NULL);
2428 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2429 oprnd1);
2430 return pattern_stmt;
2432 return NULL;
2435 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2437 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2438 if (optab2
2439 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2440 goto use_rotate;
2443 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2444 don't do anything here either. */
2445 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2446 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2447 if (!optab1
2448 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2449 || !optab2
2450 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2452 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2453 return NULL;
2454 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2455 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2456 if (!optab1
2457 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2458 || !optab2
2459 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2460 return NULL;
2463 *type_out = vectype;
2465 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2467 def = vect_recog_temp_ssa_var (type, NULL);
2468 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2469 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2470 oprnd0 = def;
2473 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2474 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2476 def = NULL_TREE;
2477 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2478 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2479 def = oprnd1;
2480 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2482 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2483 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2484 && TYPE_PRECISION (TREE_TYPE (rhs1))
2485 == TYPE_PRECISION (type))
2486 def = rhs1;
2489 if (def == NULL_TREE)
2491 def = vect_recog_temp_ssa_var (type, NULL);
2492 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2493 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2495 stype = TREE_TYPE (def);
2497 if (TREE_CODE (def) == INTEGER_CST)
2499 if (!tree_fits_uhwi_p (def)
2500 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2501 || integer_zerop (def))
2502 return NULL;
2503 def2 = build_int_cst (stype,
2504 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2506 else
2508 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2510 if (vecstype == NULL_TREE)
2511 return NULL;
2512 def2 = vect_recog_temp_ssa_var (stype, NULL);
2513 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2514 if (ext_def)
2516 basic_block new_bb
2517 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2518 gcc_assert (!new_bb);
2520 else
2521 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2523 def2 = vect_recog_temp_ssa_var (stype, NULL);
2524 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2525 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2526 gimple_assign_lhs (def_stmt), mask);
2527 if (ext_def)
2529 basic_block new_bb
2530 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2531 gcc_assert (!new_bb);
2533 else
2534 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2537 var1 = vect_recog_temp_ssa_var (type, NULL);
2538 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2539 ? LSHIFT_EXPR : RSHIFT_EXPR,
2540 oprnd0, def);
2541 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2543 var2 = vect_recog_temp_ssa_var (type, NULL);
2544 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2545 ? RSHIFT_EXPR : LSHIFT_EXPR,
2546 oprnd0, def2);
2547 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2549 /* Pattern detected. */
2550 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2552 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2553 var = vect_recog_temp_ssa_var (type, NULL);
2554 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2556 return pattern_stmt;
2559 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2560 vectorized:
2562 type a_t;
2563 TYPE b_T, res_T;
2565 S1 a_t = ;
2566 S2 b_T = ;
2567 S3 res_T = b_T op a_t;
2569 where type 'TYPE' is a type with different size than 'type',
2570 and op is <<, >> or rotate.
2572 Also detect cases:
2574 type a_t;
2575 TYPE b_T, c_T, res_T;
2577 S0 c_T = ;
2578 S1 a_t = (type) c_T;
2579 S2 b_T = ;
2580 S3 res_T = b_T op a_t;
2582 Input/Output:
2584 * STMT_VINFO: The stmt from which the pattern search begins,
2585 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2586 with a shift/rotate which has same type on both operands, in the
2587 second case just b_T op c_T, in the first case with added cast
2588 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2590 Output:
2592 * TYPE_OUT: The type of the output of this pattern.
2594 * Return value: A new stmt that will be used to replace the shift/rotate
2595 S3 stmt. */
2597 static gimple *
2598 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2599 stmt_vec_info stmt_vinfo,
2600 tree *type_out)
2602 gimple *last_stmt = stmt_vinfo->stmt;
2603 tree oprnd0, oprnd1, lhs, var;
2604 gimple *pattern_stmt;
2605 enum tree_code rhs_code;
2607 if (!is_gimple_assign (last_stmt))
2608 return NULL;
2610 rhs_code = gimple_assign_rhs_code (last_stmt);
2611 switch (rhs_code)
2613 case LSHIFT_EXPR:
2614 case RSHIFT_EXPR:
2615 case LROTATE_EXPR:
2616 case RROTATE_EXPR:
2617 break;
2618 default:
2619 return NULL;
2622 lhs = gimple_assign_lhs (last_stmt);
2623 oprnd0 = gimple_assign_rhs1 (last_stmt);
2624 oprnd1 = gimple_assign_rhs2 (last_stmt);
2625 if (TREE_CODE (oprnd0) != SSA_NAME
2626 || TREE_CODE (oprnd1) != SSA_NAME
2627 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2628 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2629 || TYPE_PRECISION (TREE_TYPE (lhs))
2630 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2631 return NULL;
2633 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2634 if (!def_vinfo)
2635 return NULL;
2637 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2638 if (*type_out == NULL_TREE)
2639 return NULL;
2641 tree def = NULL_TREE;
2642 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2643 if (def_stmt && gimple_assign_cast_p (def_stmt))
2645 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2646 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2647 && TYPE_PRECISION (TREE_TYPE (rhs1))
2648 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2650 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2651 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2652 def = rhs1;
2653 else
2655 tree mask
2656 = build_low_bits_mask (TREE_TYPE (rhs1),
2657 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2658 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2659 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2660 tree vecstype = get_vectype_for_scalar_type (vinfo,
2661 TREE_TYPE (rhs1));
2662 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2667 if (def == NULL_TREE)
2669 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2670 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2671 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2674 /* Pattern detected. */
2675 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2677 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2678 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2679 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2681 return pattern_stmt;
2684 /* Return true iff the target has a vector optab implementing the operation
2685 CODE on type VECTYPE. */
2687 static bool
2688 target_has_vecop_for_code (tree_code code, tree vectype)
2690 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2691 return voptab
2692 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2695 /* Verify that the target has optabs of VECTYPE to perform all the steps
2696 needed by the multiplication-by-immediate synthesis algorithm described by
2697 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2698 present. Return true iff the target supports all the steps. */
2700 static bool
2701 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2702 tree vectype, bool synth_shift_p)
2704 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2705 return false;
2707 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2708 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2710 if (var == negate_variant
2711 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2712 return false;
2714 /* If we must synthesize shifts with additions make sure that vector
2715 addition is available. */
2716 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2717 return false;
2719 for (int i = 1; i < alg->ops; i++)
2721 switch (alg->op[i])
2723 case alg_shift:
2724 break;
2725 case alg_add_t_m2:
2726 case alg_add_t2_m:
2727 case alg_add_factor:
2728 if (!supports_vplus)
2729 return false;
2730 break;
2731 case alg_sub_t_m2:
2732 case alg_sub_t2_m:
2733 case alg_sub_factor:
2734 if (!supports_vminus)
2735 return false;
2736 break;
2737 case alg_unknown:
2738 case alg_m:
2739 case alg_zero:
2740 case alg_impossible:
2741 return false;
2742 default:
2743 gcc_unreachable ();
2747 return true;
2750 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2751 putting the final result in DEST. Append all statements but the last into
2752 VINFO. Return the last statement. */
2754 static gimple *
2755 synth_lshift_by_additions (vec_info *vinfo,
2756 tree dest, tree op, HOST_WIDE_INT amnt,
2757 stmt_vec_info stmt_info)
2759 HOST_WIDE_INT i;
2760 tree itype = TREE_TYPE (op);
2761 tree prev_res = op;
2762 gcc_assert (amnt >= 0);
2763 for (i = 0; i < amnt; i++)
2765 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2766 : dest;
2767 gimple *stmt
2768 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2769 prev_res = tmp_var;
2770 if (i < amnt - 1)
2771 append_pattern_def_seq (vinfo, stmt_info, stmt);
2772 else
2773 return stmt;
2775 gcc_unreachable ();
2776 return NULL;
2779 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2780 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2781 the process if necessary. Append the resulting assignment statements
2782 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2783 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2784 left shifts using additions. */
2786 static tree
2787 apply_binop_and_append_stmt (vec_info *vinfo,
2788 tree_code code, tree op1, tree op2,
2789 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2791 if (integer_zerop (op2)
2792 && (code == LSHIFT_EXPR
2793 || code == PLUS_EXPR))
2795 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2796 return op1;
2799 gimple *stmt;
2800 tree itype = TREE_TYPE (op1);
2801 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2803 if (code == LSHIFT_EXPR
2804 && synth_shift_p)
2806 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
2807 TREE_INT_CST_LOW (op2), stmt_vinfo);
2808 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2809 return tmp_var;
2812 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2813 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2814 return tmp_var;
2817 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2818 and simple arithmetic operations to be vectorized. Record the statements
2819 produced in STMT_VINFO and return the last statement in the sequence or
2820 NULL if it's not possible to synthesize such a multiplication.
2821 This function mirrors the behavior of expand_mult_const in expmed.c but
2822 works on tree-ssa form. */
2824 static gimple *
2825 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
2826 stmt_vec_info stmt_vinfo)
2828 tree itype = TREE_TYPE (op);
2829 machine_mode mode = TYPE_MODE (itype);
2830 struct algorithm alg;
2831 mult_variant variant;
2832 if (!tree_fits_shwi_p (val))
2833 return NULL;
2835 /* Multiplication synthesis by shifts, adds and subs can introduce
2836 signed overflow where the original operation didn't. Perform the
2837 operations on an unsigned type and cast back to avoid this.
2838 In the future we may want to relax this for synthesis algorithms
2839 that we can prove do not cause unexpected overflow. */
2840 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2842 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2844 /* Targets that don't support vector shifts but support vector additions
2845 can synthesize shifts that way. */
2846 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
2848 HOST_WIDE_INT hwval = tree_to_shwi (val);
2849 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2850 The vectorizer's benefit analysis will decide whether it's beneficial
2851 to do this. */
2852 bool possible = choose_mult_variant (mode, hwval, &alg,
2853 &variant, MAX_COST);
2854 if (!possible)
2855 return NULL;
2857 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
2859 if (!vectype
2860 || !target_supports_mult_synth_alg (&alg, variant,
2861 vectype, synth_shift_p))
2862 return NULL;
2864 tree accumulator;
2866 /* Clear out the sequence of statements so we can populate it below. */
2867 gimple *stmt = NULL;
2869 if (cast_to_unsigned_p)
2871 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2872 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2873 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2874 op = tmp_op;
2877 if (alg.op[0] == alg_zero)
2878 accumulator = build_int_cst (multtype, 0);
2879 else
2880 accumulator = op;
2882 bool needs_fixup = (variant == negate_variant)
2883 || (variant == add_variant);
2885 for (int i = 1; i < alg.ops; i++)
2887 tree shft_log = build_int_cst (multtype, alg.log[i]);
2888 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2889 tree tmp_var = NULL_TREE;
2891 switch (alg.op[i])
2893 case alg_shift:
2894 if (synth_shift_p)
2895 stmt
2896 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
2897 alg.log[i], stmt_vinfo);
2898 else
2899 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2900 shft_log);
2901 break;
2902 case alg_add_t_m2:
2903 tmp_var
2904 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
2905 stmt_vinfo, synth_shift_p);
2906 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2907 tmp_var);
2908 break;
2909 case alg_sub_t_m2:
2910 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
2911 shft_log, stmt_vinfo,
2912 synth_shift_p);
2913 /* In some algorithms the first step involves zeroing the
2914 accumulator. If subtracting from such an accumulator
2915 just emit the negation directly. */
2916 if (integer_zerop (accumulator))
2917 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2918 else
2919 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2920 tmp_var);
2921 break;
2922 case alg_add_t2_m:
2923 tmp_var
2924 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2925 shft_log, stmt_vinfo, synth_shift_p);
2926 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2927 break;
2928 case alg_sub_t2_m:
2929 tmp_var
2930 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2931 shft_log, stmt_vinfo, synth_shift_p);
2932 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2933 break;
2934 case alg_add_factor:
2935 tmp_var
2936 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2937 shft_log, stmt_vinfo, synth_shift_p);
2938 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2939 tmp_var);
2940 break;
2941 case alg_sub_factor:
2942 tmp_var
2943 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
2944 shft_log, stmt_vinfo, synth_shift_p);
2945 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2946 accumulator);
2947 break;
2948 default:
2949 gcc_unreachable ();
2951 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2952 but rather return it directly. */
2954 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2955 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2956 accumulator = accum_tmp;
2958 if (variant == negate_variant)
2960 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2961 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2962 accumulator = accum_tmp;
2963 if (cast_to_unsigned_p)
2964 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2966 else if (variant == add_variant)
2968 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2969 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2970 accumulator = accum_tmp;
2971 if (cast_to_unsigned_p)
2972 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2974 /* Move back to a signed if needed. */
2975 if (cast_to_unsigned_p)
2977 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2978 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2981 return stmt;
2984 /* Detect multiplication by constant and convert it into a sequence of
2985 shifts and additions, subtractions, negations. We reuse the
2986 choose_mult_variant algorithms from expmed.c
2988 Input/Output:
2990 STMT_VINFO: The stmt from which the pattern search begins,
2991 i.e. the mult stmt.
2993 Output:
2995 * TYPE_OUT: The type of the output of this pattern.
2997 * Return value: A new stmt that will be used to replace
2998 the multiplication. */
3000 static gimple *
3001 vect_recog_mult_pattern (vec_info *vinfo,
3002 stmt_vec_info stmt_vinfo, tree *type_out)
3004 gimple *last_stmt = stmt_vinfo->stmt;
3005 tree oprnd0, oprnd1, vectype, itype;
3006 gimple *pattern_stmt;
3008 if (!is_gimple_assign (last_stmt))
3009 return NULL;
3011 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3012 return NULL;
3014 oprnd0 = gimple_assign_rhs1 (last_stmt);
3015 oprnd1 = gimple_assign_rhs2 (last_stmt);
3016 itype = TREE_TYPE (oprnd0);
3018 if (TREE_CODE (oprnd0) != SSA_NAME
3019 || TREE_CODE (oprnd1) != INTEGER_CST
3020 || !INTEGRAL_TYPE_P (itype)
3021 || !type_has_mode_precision_p (itype))
3022 return NULL;
3024 vectype = get_vectype_for_scalar_type (vinfo, itype);
3025 if (vectype == NULL_TREE)
3026 return NULL;
3028 /* If the target can handle vectorized multiplication natively,
3029 don't attempt to optimize this. */
3030 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3031 if (mul_optab != unknown_optab)
3033 machine_mode vec_mode = TYPE_MODE (vectype);
3034 int icode = (int) optab_handler (mul_optab, vec_mode);
3035 if (icode != CODE_FOR_nothing)
3036 return NULL;
3039 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3040 oprnd0, oprnd1, stmt_vinfo);
3041 if (!pattern_stmt)
3042 return NULL;
3044 /* Pattern detected. */
3045 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3047 *type_out = vectype;
3049 return pattern_stmt;
3052 /* Detect a signed division by a constant that wouldn't be
3053 otherwise vectorized:
3055 type a_t, b_t;
3057 S1 a_t = b_t / N;
3059 where type 'type' is an integral type and N is a constant.
3061 Similarly handle modulo by a constant:
3063 S4 a_t = b_t % N;
3065 Input/Output:
3067 * STMT_VINFO: The stmt from which the pattern search begins,
3068 i.e. the division stmt. S1 is replaced by if N is a power
3069 of two constant and type is signed:
3070 S3 y_t = b_t < 0 ? N - 1 : 0;
3071 S2 x_t = b_t + y_t;
3072 S1' a_t = x_t >> log2 (N);
3074 S4 is replaced if N is a power of two constant and
3075 type is signed by (where *_T temporaries have unsigned type):
3076 S9 y_T = b_t < 0 ? -1U : 0U;
3077 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3078 S7 z_t = (type) z_T;
3079 S6 w_t = b_t + z_t;
3080 S5 x_t = w_t & (N - 1);
3081 S4' a_t = x_t - z_t;
3083 Output:
3085 * TYPE_OUT: The type of the output of this pattern.
3087 * Return value: A new stmt that will be used to replace the division
3088 S1 or modulo S4 stmt. */
3090 static gimple *
3091 vect_recog_divmod_pattern (vec_info *vinfo,
3092 stmt_vec_info stmt_vinfo, tree *type_out)
3094 gimple *last_stmt = stmt_vinfo->stmt;
3095 tree oprnd0, oprnd1, vectype, itype, cond;
3096 gimple *pattern_stmt, *def_stmt;
3097 enum tree_code rhs_code;
3098 optab optab;
3099 tree q;
3100 int dummy_int, prec;
3102 if (!is_gimple_assign (last_stmt))
3103 return NULL;
3105 rhs_code = gimple_assign_rhs_code (last_stmt);
3106 switch (rhs_code)
3108 case TRUNC_DIV_EXPR:
3109 case EXACT_DIV_EXPR:
3110 case TRUNC_MOD_EXPR:
3111 break;
3112 default:
3113 return NULL;
3116 oprnd0 = gimple_assign_rhs1 (last_stmt);
3117 oprnd1 = gimple_assign_rhs2 (last_stmt);
3118 itype = TREE_TYPE (oprnd0);
3119 if (TREE_CODE (oprnd0) != SSA_NAME
3120 || TREE_CODE (oprnd1) != INTEGER_CST
3121 || TREE_CODE (itype) != INTEGER_TYPE
3122 || !type_has_mode_precision_p (itype))
3123 return NULL;
3125 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3126 vectype = get_vectype_for_scalar_type (vinfo, itype);
3127 if (vectype == NULL_TREE)
3128 return NULL;
3130 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3132 /* If the target can handle vectorized division or modulo natively,
3133 don't attempt to optimize this, since native division is likely
3134 to give smaller code. */
3135 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3136 if (optab != unknown_optab)
3138 machine_mode vec_mode = TYPE_MODE (vectype);
3139 int icode = (int) optab_handler (optab, vec_mode);
3140 if (icode != CODE_FOR_nothing)
3141 return NULL;
3145 prec = TYPE_PRECISION (itype);
3146 if (integer_pow2p (oprnd1))
3148 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3149 return NULL;
3151 /* Pattern detected. */
3152 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3154 *type_out = vectype;
3156 /* Check if the target supports this internal function. */
3157 internal_fn ifn = IFN_DIV_POW2;
3158 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3160 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3162 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3163 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3164 gimple_call_set_lhs (div_stmt, var_div);
3166 if (rhs_code == TRUNC_MOD_EXPR)
3168 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3169 def_stmt
3170 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3171 LSHIFT_EXPR, var_div, shift);
3172 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3173 pattern_stmt
3174 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3175 MINUS_EXPR, oprnd0,
3176 gimple_assign_lhs (def_stmt));
3178 else
3179 pattern_stmt = div_stmt;
3180 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3182 return pattern_stmt;
3185 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3186 build_int_cst (itype, 0));
3187 if (rhs_code == TRUNC_DIV_EXPR
3188 || rhs_code == EXACT_DIV_EXPR)
3190 tree var = vect_recog_temp_ssa_var (itype, NULL);
3191 tree shift;
3192 def_stmt
3193 = gimple_build_assign (var, COND_EXPR, cond,
3194 fold_build2 (MINUS_EXPR, itype, oprnd1,
3195 build_int_cst (itype, 1)),
3196 build_int_cst (itype, 0));
3197 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3198 var = vect_recog_temp_ssa_var (itype, NULL);
3199 def_stmt
3200 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3201 gimple_assign_lhs (def_stmt));
3202 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3204 shift = build_int_cst (itype, tree_log2 (oprnd1));
3205 pattern_stmt
3206 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3207 RSHIFT_EXPR, var, shift);
3209 else
3211 tree signmask;
3212 if (compare_tree_int (oprnd1, 2) == 0)
3214 signmask = vect_recog_temp_ssa_var (itype, NULL);
3215 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3216 build_int_cst (itype, 1),
3217 build_int_cst (itype, 0));
3218 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3220 else
3222 tree utype
3223 = build_nonstandard_integer_type (prec, 1);
3224 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3225 tree shift
3226 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3227 - tree_log2 (oprnd1));
3228 tree var = vect_recog_temp_ssa_var (utype, NULL);
3230 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3231 build_int_cst (utype, -1),
3232 build_int_cst (utype, 0));
3233 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3234 var = vect_recog_temp_ssa_var (utype, NULL);
3235 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3236 gimple_assign_lhs (def_stmt),
3237 shift);
3238 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3239 signmask = vect_recog_temp_ssa_var (itype, NULL);
3240 def_stmt
3241 = gimple_build_assign (signmask, NOP_EXPR, var);
3242 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3244 def_stmt
3245 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3246 PLUS_EXPR, oprnd0, signmask);
3247 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3248 def_stmt
3249 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3250 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3251 fold_build2 (MINUS_EXPR, itype, oprnd1,
3252 build_int_cst (itype, 1)));
3253 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3255 pattern_stmt
3256 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3257 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3258 signmask);
3261 return pattern_stmt;
3264 if (prec > HOST_BITS_PER_WIDE_INT
3265 || integer_zerop (oprnd1))
3266 return NULL;
3268 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3269 return NULL;
3271 if (TYPE_UNSIGNED (itype))
3273 unsigned HOST_WIDE_INT mh, ml;
3274 int pre_shift, post_shift;
3275 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3276 & GET_MODE_MASK (itype_mode));
3277 tree t1, t2, t3, t4;
3279 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3280 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3281 return NULL;
3283 /* Find a suitable multiplier and right shift count
3284 instead of multiplying with D. */
3285 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3287 /* If the suggested multiplier is more than SIZE bits, we can do better
3288 for even divisors, using an initial right shift. */
3289 if (mh != 0 && (d & 1) == 0)
3291 pre_shift = ctz_or_zero (d);
3292 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3293 &ml, &post_shift, &dummy_int);
3294 gcc_assert (!mh);
3296 else
3297 pre_shift = 0;
3299 if (mh != 0)
3301 if (post_shift - 1 >= prec)
3302 return NULL;
3304 /* t1 = oprnd0 h* ml;
3305 t2 = oprnd0 - t1;
3306 t3 = t2 >> 1;
3307 t4 = t1 + t3;
3308 q = t4 >> (post_shift - 1); */
3309 t1 = vect_recog_temp_ssa_var (itype, NULL);
3310 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3311 build_int_cst (itype, ml));
3312 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3314 t2 = vect_recog_temp_ssa_var (itype, NULL);
3315 def_stmt
3316 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3317 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3319 t3 = vect_recog_temp_ssa_var (itype, NULL);
3320 def_stmt
3321 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3322 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3324 t4 = vect_recog_temp_ssa_var (itype, NULL);
3325 def_stmt
3326 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3328 if (post_shift != 1)
3330 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3332 q = vect_recog_temp_ssa_var (itype, NULL);
3333 pattern_stmt
3334 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3335 build_int_cst (itype, post_shift - 1));
3337 else
3339 q = t4;
3340 pattern_stmt = def_stmt;
3343 else
3345 if (pre_shift >= prec || post_shift >= prec)
3346 return NULL;
3348 /* t1 = oprnd0 >> pre_shift;
3349 t2 = t1 h* ml;
3350 q = t2 >> post_shift; */
3351 if (pre_shift)
3353 t1 = vect_recog_temp_ssa_var (itype, NULL);
3354 def_stmt
3355 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3356 build_int_cst (NULL, pre_shift));
3357 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3359 else
3360 t1 = oprnd0;
3362 t2 = vect_recog_temp_ssa_var (itype, NULL);
3363 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3364 build_int_cst (itype, ml));
3366 if (post_shift)
3368 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3370 q = vect_recog_temp_ssa_var (itype, NULL);
3371 def_stmt
3372 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3373 build_int_cst (itype, post_shift));
3375 else
3376 q = t2;
3378 pattern_stmt = def_stmt;
3381 else
3383 unsigned HOST_WIDE_INT ml;
3384 int post_shift;
3385 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3386 unsigned HOST_WIDE_INT abs_d;
3387 bool add = false;
3388 tree t1, t2, t3, t4;
3390 /* Give up for -1. */
3391 if (d == -1)
3392 return NULL;
3394 /* Since d might be INT_MIN, we have to cast to
3395 unsigned HOST_WIDE_INT before negating to avoid
3396 undefined signed overflow. */
3397 abs_d = (d >= 0
3398 ? (unsigned HOST_WIDE_INT) d
3399 : - (unsigned HOST_WIDE_INT) d);
3401 /* n rem d = n rem -d */
3402 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3404 d = abs_d;
3405 oprnd1 = build_int_cst (itype, abs_d);
3407 if (HOST_BITS_PER_WIDE_INT >= prec
3408 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3409 /* This case is not handled correctly below. */
3410 return NULL;
3412 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3413 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3415 add = true;
3416 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3418 if (post_shift >= prec)
3419 return NULL;
3421 /* t1 = oprnd0 h* ml; */
3422 t1 = vect_recog_temp_ssa_var (itype, NULL);
3423 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3424 build_int_cst (itype, ml));
3426 if (add)
3428 /* t2 = t1 + oprnd0; */
3429 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3430 t2 = vect_recog_temp_ssa_var (itype, NULL);
3431 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3433 else
3434 t2 = t1;
3436 if (post_shift)
3438 /* t3 = t2 >> post_shift; */
3439 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3440 t3 = vect_recog_temp_ssa_var (itype, NULL);
3441 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3442 build_int_cst (itype, post_shift));
3444 else
3445 t3 = t2;
3447 int msb = 1;
3448 value_range r;
3449 get_range_query (cfun)->range_of_expr (r, oprnd0);
3450 if (r.kind () == VR_RANGE)
3452 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
3453 msb = 0;
3454 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
3455 msb = -1;
3458 if (msb == 0 && d >= 0)
3460 /* q = t3; */
3461 q = t3;
3462 pattern_stmt = def_stmt;
3464 else
3466 /* t4 = oprnd0 >> (prec - 1);
3467 or if we know from VRP that oprnd0 >= 0
3468 t4 = 0;
3469 or if we know from VRP that oprnd0 < 0
3470 t4 = -1; */
3471 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3472 t4 = vect_recog_temp_ssa_var (itype, NULL);
3473 if (msb != 1)
3474 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3475 build_int_cst (itype, msb));
3476 else
3477 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3478 build_int_cst (itype, prec - 1));
3479 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3481 /* q = t3 - t4; or q = t4 - t3; */
3482 q = vect_recog_temp_ssa_var (itype, NULL);
3483 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3484 d < 0 ? t3 : t4);
3488 if (rhs_code == TRUNC_MOD_EXPR)
3490 tree r, t1;
3492 /* We divided. Now finish by:
3493 t1 = q * oprnd1;
3494 r = oprnd0 - t1; */
3495 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3497 t1 = vect_recog_temp_ssa_var (itype, NULL);
3498 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3499 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3501 r = vect_recog_temp_ssa_var (itype, NULL);
3502 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3505 /* Pattern detected. */
3506 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3508 *type_out = vectype;
3509 return pattern_stmt;
3512 /* Function vect_recog_mixed_size_cond_pattern
3514 Try to find the following pattern:
3516 type x_t, y_t;
3517 TYPE a_T, b_T, c_T;
3518 loop:
3519 S1 a_T = x_t CMP y_t ? b_T : c_T;
3521 where type 'TYPE' is an integral type which has different size
3522 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3523 than 'type', the constants need to fit into an integer type
3524 with the same width as 'type') or results of conversion from 'type'.
3526 Input:
3528 * STMT_VINFO: The stmt from which the pattern search begins.
3530 Output:
3532 * TYPE_OUT: The type of the output of this pattern.
3534 * Return value: A new stmt that will be used to replace the pattern.
3535 Additionally a def_stmt is added.
3537 a_it = x_t CMP y_t ? b_it : c_it;
3538 a_T = (TYPE) a_it; */
3540 static gimple *
3541 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3542 stmt_vec_info stmt_vinfo, tree *type_out)
3544 gimple *last_stmt = stmt_vinfo->stmt;
3545 tree cond_expr, then_clause, else_clause;
3546 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3547 gimple *pattern_stmt, *def_stmt;
3548 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3549 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3550 bool promotion;
3551 tree comp_scalar_type;
3553 if (!is_gimple_assign (last_stmt)
3554 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3555 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3556 return NULL;
3558 cond_expr = gimple_assign_rhs1 (last_stmt);
3559 then_clause = gimple_assign_rhs2 (last_stmt);
3560 else_clause = gimple_assign_rhs3 (last_stmt);
3562 if (!COMPARISON_CLASS_P (cond_expr))
3563 return NULL;
3565 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3566 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3567 if (comp_vectype == NULL_TREE)
3568 return NULL;
3570 type = gimple_expr_type (last_stmt);
3571 if (types_compatible_p (type, comp_scalar_type)
3572 || ((TREE_CODE (then_clause) != INTEGER_CST
3573 || TREE_CODE (else_clause) != INTEGER_CST)
3574 && !INTEGRAL_TYPE_P (comp_scalar_type))
3575 || !INTEGRAL_TYPE_P (type))
3576 return NULL;
3578 if ((TREE_CODE (then_clause) != INTEGER_CST
3579 && !type_conversion_p (vinfo, then_clause, false,
3580 &orig_type0, &def_stmt0, &promotion))
3581 || (TREE_CODE (else_clause) != INTEGER_CST
3582 && !type_conversion_p (vinfo, else_clause, false,
3583 &orig_type1, &def_stmt1, &promotion)))
3584 return NULL;
3586 if (orig_type0 && orig_type1
3587 && !types_compatible_p (orig_type0, orig_type1))
3588 return NULL;
3590 if (orig_type0)
3592 if (!types_compatible_p (orig_type0, comp_scalar_type))
3593 return NULL;
3594 then_clause = gimple_assign_rhs1 (def_stmt0);
3595 itype = orig_type0;
3598 if (orig_type1)
3600 if (!types_compatible_p (orig_type1, comp_scalar_type))
3601 return NULL;
3602 else_clause = gimple_assign_rhs1 (def_stmt1);
3603 itype = orig_type1;
3607 HOST_WIDE_INT cmp_mode_size
3608 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3610 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3611 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3612 return NULL;
3614 vectype = get_vectype_for_scalar_type (vinfo, type);
3615 if (vectype == NULL_TREE)
3616 return NULL;
3618 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3619 return NULL;
3621 if (itype == NULL_TREE)
3622 itype = build_nonstandard_integer_type (cmp_mode_size,
3623 TYPE_UNSIGNED (type));
3625 if (itype == NULL_TREE
3626 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3627 return NULL;
3629 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3630 if (vecitype == NULL_TREE)
3631 return NULL;
3633 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3634 return NULL;
3636 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3638 if ((TREE_CODE (then_clause) == INTEGER_CST
3639 && !int_fits_type_p (then_clause, itype))
3640 || (TREE_CODE (else_clause) == INTEGER_CST
3641 && !int_fits_type_p (else_clause, itype)))
3642 return NULL;
3645 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3646 COND_EXPR, unshare_expr (cond_expr),
3647 fold_convert (itype, then_clause),
3648 fold_convert (itype, else_clause));
3649 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3650 NOP_EXPR, gimple_assign_lhs (def_stmt));
3652 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3653 *type_out = vectype;
3655 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3657 return pattern_stmt;
3661 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3662 true if bool VAR can and should be optimized that way. Assume it shouldn't
3663 in case it's a result of a comparison which can be directly vectorized into
3664 a vector comparison. Fills in STMTS with all stmts visited during the
3665 walk. */
3667 static bool
3668 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3670 tree rhs1;
3671 enum tree_code rhs_code;
3673 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3674 if (!def_stmt_info)
3675 return false;
3677 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3678 if (!def_stmt)
3679 return false;
3681 if (stmts.contains (def_stmt))
3682 return true;
3684 rhs1 = gimple_assign_rhs1 (def_stmt);
3685 rhs_code = gimple_assign_rhs_code (def_stmt);
3686 switch (rhs_code)
3688 case SSA_NAME:
3689 if (! check_bool_pattern (rhs1, vinfo, stmts))
3690 return false;
3691 break;
3693 CASE_CONVERT:
3694 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3695 return false;
3696 if (! check_bool_pattern (rhs1, vinfo, stmts))
3697 return false;
3698 break;
3700 case BIT_NOT_EXPR:
3701 if (! check_bool_pattern (rhs1, vinfo, stmts))
3702 return false;
3703 break;
3705 case BIT_AND_EXPR:
3706 case BIT_IOR_EXPR:
3707 case BIT_XOR_EXPR:
3708 if (! check_bool_pattern (rhs1, vinfo, stmts)
3709 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3710 return false;
3711 break;
3713 default:
3714 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3716 tree vecitype, comp_vectype;
3718 /* If the comparison can throw, then is_gimple_condexpr will be
3719 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3720 if (stmt_could_throw_p (cfun, def_stmt))
3721 return false;
3723 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3724 if (comp_vectype == NULL_TREE)
3725 return false;
3727 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3728 TREE_TYPE (rhs1));
3729 if (mask_type
3730 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3731 return false;
3733 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3735 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3736 tree itype
3737 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3738 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3739 if (vecitype == NULL_TREE)
3740 return false;
3742 else
3743 vecitype = comp_vectype;
3744 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3745 return false;
3747 else
3748 return false;
3749 break;
3752 bool res = stmts.add (def_stmt);
3753 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3754 gcc_assert (!res);
3756 return true;
3760 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3761 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3762 pattern sequence. */
3764 static tree
3765 adjust_bool_pattern_cast (vec_info *vinfo,
3766 tree type, tree var, stmt_vec_info stmt_info)
3768 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3769 NOP_EXPR, var);
3770 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
3771 get_vectype_for_scalar_type (vinfo, type));
3772 return gimple_assign_lhs (cast_stmt);
3775 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3776 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3777 type, OUT_TYPE is the desired final integer type of the whole pattern.
3778 STMT_INFO is the info of the pattern root and is where pattern stmts should
3779 be associated with. DEFS is a map of pattern defs. */
3781 static void
3782 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
3783 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3785 gimple *stmt = SSA_NAME_DEF_STMT (var);
3786 enum tree_code rhs_code, def_rhs_code;
3787 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3788 location_t loc;
3789 gimple *pattern_stmt, *def_stmt;
3790 tree trueval = NULL_TREE;
3792 rhs1 = gimple_assign_rhs1 (stmt);
3793 rhs2 = gimple_assign_rhs2 (stmt);
3794 rhs_code = gimple_assign_rhs_code (stmt);
3795 loc = gimple_location (stmt);
3796 switch (rhs_code)
3798 case SSA_NAME:
3799 CASE_CONVERT:
3800 irhs1 = *defs.get (rhs1);
3801 itype = TREE_TYPE (irhs1);
3802 pattern_stmt
3803 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3804 SSA_NAME, irhs1);
3805 break;
3807 case BIT_NOT_EXPR:
3808 irhs1 = *defs.get (rhs1);
3809 itype = TREE_TYPE (irhs1);
3810 pattern_stmt
3811 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3812 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3813 break;
3815 case BIT_AND_EXPR:
3816 /* Try to optimize x = y & (a < b ? 1 : 0); into
3817 x = (a < b ? y : 0);
3819 E.g. for:
3820 bool a_b, b_b, c_b;
3821 TYPE d_T;
3823 S1 a_b = x1 CMP1 y1;
3824 S2 b_b = x2 CMP2 y2;
3825 S3 c_b = a_b & b_b;
3826 S4 d_T = (TYPE) c_b;
3828 we would normally emit:
3830 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3831 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3832 S3' c_T = a_T & b_T;
3833 S4' d_T = c_T;
3835 but we can save one stmt by using the
3836 result of one of the COND_EXPRs in the other COND_EXPR and leave
3837 BIT_AND_EXPR stmt out:
3839 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3840 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3841 S4' f_T = c_T;
3843 At least when VEC_COND_EXPR is implemented using masks
3844 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3845 computes the comparison masks and ands it, in one case with
3846 all ones vector, in the other case with a vector register.
3847 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3848 often more expensive. */
3849 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3850 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3851 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3853 irhs1 = *defs.get (rhs1);
3854 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3855 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3856 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3858 rhs_code = def_rhs_code;
3859 rhs1 = def_rhs1;
3860 rhs2 = gimple_assign_rhs2 (def_stmt);
3861 trueval = irhs1;
3862 goto do_compare;
3864 else
3865 irhs2 = *defs.get (rhs2);
3866 goto and_ior_xor;
3868 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3869 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3870 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3872 irhs2 = *defs.get (rhs2);
3873 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3874 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3875 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3877 rhs_code = def_rhs_code;
3878 rhs1 = def_rhs1;
3879 rhs2 = gimple_assign_rhs2 (def_stmt);
3880 trueval = irhs2;
3881 goto do_compare;
3883 else
3884 irhs1 = *defs.get (rhs1);
3885 goto and_ior_xor;
3887 /* FALLTHRU */
3888 case BIT_IOR_EXPR:
3889 case BIT_XOR_EXPR:
3890 irhs1 = *defs.get (rhs1);
3891 irhs2 = *defs.get (rhs2);
3892 and_ior_xor:
3893 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3894 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3896 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3897 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3898 int out_prec = TYPE_PRECISION (out_type);
3899 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3900 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
3901 stmt_info);
3902 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3903 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
3904 stmt_info);
3905 else
3907 irhs1 = adjust_bool_pattern_cast (vinfo,
3908 out_type, irhs1, stmt_info);
3909 irhs2 = adjust_bool_pattern_cast (vinfo,
3910 out_type, irhs2, stmt_info);
3913 itype = TREE_TYPE (irhs1);
3914 pattern_stmt
3915 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3916 rhs_code, irhs1, irhs2);
3917 break;
3919 default:
3920 do_compare:
3921 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3922 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3923 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3924 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3925 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3927 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3928 itype
3929 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3931 else
3932 itype = TREE_TYPE (rhs1);
3933 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3934 if (trueval == NULL_TREE)
3935 trueval = build_int_cst (itype, 1);
3936 else
3937 gcc_checking_assert (useless_type_conversion_p (itype,
3938 TREE_TYPE (trueval)));
3939 pattern_stmt
3940 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3941 COND_EXPR, cond_expr, trueval,
3942 build_int_cst (itype, 0));
3943 break;
3946 gimple_set_location (pattern_stmt, loc);
3947 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
3948 get_vectype_for_scalar_type (vinfo, itype));
3949 defs.put (var, gimple_assign_lhs (pattern_stmt));
3952 /* Comparison function to qsort a vector of gimple stmts after UID. */
3954 static int
3955 sort_after_uid (const void *p1, const void *p2)
3957 const gimple *stmt1 = *(const gimple * const *)p1;
3958 const gimple *stmt2 = *(const gimple * const *)p2;
3959 return gimple_uid (stmt1) - gimple_uid (stmt2);
3962 /* Create pattern stmts for all stmts participating in the bool pattern
3963 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3964 OUT_TYPE. Return the def of the pattern root. */
3966 static tree
3967 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
3968 tree out_type, stmt_vec_info stmt_info)
3970 /* Gather original stmts in the bool pattern in their order of appearance
3971 in the IL. */
3972 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3973 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3974 i != bool_stmt_set.end (); ++i)
3975 bool_stmts.quick_push (*i);
3976 bool_stmts.qsort (sort_after_uid);
3978 /* Now process them in that order, producing pattern stmts. */
3979 hash_map <tree, tree> defs;
3980 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3981 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
3982 out_type, stmt_info, defs);
3984 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3985 gimple *pattern_stmt
3986 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3987 return gimple_assign_lhs (pattern_stmt);
3990 /* Return the proper type for converting bool VAR into
3991 an integer value or NULL_TREE if no such type exists.
3992 The type is chosen so that the converted value has the
3993 same number of elements as VAR's vector type. */
3995 static tree
3996 integer_type_for_mask (tree var, vec_info *vinfo)
3998 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3999 return NULL_TREE;
4001 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4002 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4003 return NULL_TREE;
4005 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4008 /* Function vect_recog_bool_pattern
4010 Try to find pattern like following:
4012 bool a_b, b_b, c_b, d_b, e_b;
4013 TYPE f_T;
4014 loop:
4015 S1 a_b = x1 CMP1 y1;
4016 S2 b_b = x2 CMP2 y2;
4017 S3 c_b = a_b & b_b;
4018 S4 d_b = x3 CMP3 y3;
4019 S5 e_b = c_b | d_b;
4020 S6 f_T = (TYPE) e_b;
4022 where type 'TYPE' is an integral type. Or a similar pattern
4023 ending in
4025 S6 f_Y = e_b ? r_Y : s_Y;
4027 as results from if-conversion of a complex condition.
4029 Input:
4031 * STMT_VINFO: The stmt at the end from which the pattern
4032 search begins, i.e. cast of a bool to
4033 an integer type.
4035 Output:
4037 * TYPE_OUT: The type of the output of this pattern.
4039 * Return value: A new stmt that will be used to replace the pattern.
4041 Assuming size of TYPE is the same as size of all comparisons
4042 (otherwise some casts would be added where needed), the above
4043 sequence we create related pattern stmts:
4044 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4045 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4046 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4047 S5' e_T = c_T | d_T;
4048 S6' f_T = e_T;
4050 Instead of the above S3' we could emit:
4051 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4052 S3' c_T = a_T | b_T;
4053 but the above is more efficient. */
4055 static gimple *
4056 vect_recog_bool_pattern (vec_info *vinfo,
4057 stmt_vec_info stmt_vinfo, tree *type_out)
4059 gimple *last_stmt = stmt_vinfo->stmt;
4060 enum tree_code rhs_code;
4061 tree var, lhs, rhs, vectype;
4062 gimple *pattern_stmt;
4064 if (!is_gimple_assign (last_stmt))
4065 return NULL;
4067 var = gimple_assign_rhs1 (last_stmt);
4068 lhs = gimple_assign_lhs (last_stmt);
4069 rhs_code = gimple_assign_rhs_code (last_stmt);
4071 if (rhs_code == VIEW_CONVERT_EXPR)
4072 var = TREE_OPERAND (var, 0);
4074 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4075 return NULL;
4077 hash_set<gimple *> bool_stmts;
4079 if (CONVERT_EXPR_CODE_P (rhs_code)
4080 || rhs_code == VIEW_CONVERT_EXPR)
4082 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4083 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4084 return NULL;
4085 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4086 if (vectype == NULL_TREE)
4087 return NULL;
4089 if (check_bool_pattern (var, vinfo, bool_stmts))
4091 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4092 TREE_TYPE (lhs), stmt_vinfo);
4093 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4094 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4095 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4096 else
4097 pattern_stmt
4098 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4100 else
4102 tree type = integer_type_for_mask (var, vinfo);
4103 tree cst0, cst1, tmp;
4105 if (!type)
4106 return NULL;
4108 /* We may directly use cond with narrowed type to avoid
4109 multiple cond exprs with following result packing and
4110 perform single cond with packed mask instead. In case
4111 of widening we better make cond first and then extract
4112 results. */
4113 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4114 type = TREE_TYPE (lhs);
4116 cst0 = build_int_cst (type, 0);
4117 cst1 = build_int_cst (type, 1);
4118 tmp = vect_recog_temp_ssa_var (type, NULL);
4119 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4121 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4123 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4124 append_pattern_def_seq (vinfo, stmt_vinfo,
4125 pattern_stmt, new_vectype);
4127 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4128 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4132 *type_out = vectype;
4133 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4135 return pattern_stmt;
4137 else if (rhs_code == COND_EXPR
4138 && TREE_CODE (var) == SSA_NAME)
4140 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4141 if (vectype == NULL_TREE)
4142 return NULL;
4144 /* Build a scalar type for the boolean result that when
4145 vectorized matches the vector type of the result in
4146 size and number of elements. */
4147 unsigned prec
4148 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4149 TYPE_VECTOR_SUBPARTS (vectype));
4151 tree type
4152 = build_nonstandard_integer_type (prec,
4153 TYPE_UNSIGNED (TREE_TYPE (var)));
4154 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4155 return NULL;
4157 if (!check_bool_pattern (var, vinfo, bool_stmts))
4158 return NULL;
4160 rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4162 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4163 pattern_stmt
4164 = gimple_build_assign (lhs, COND_EXPR,
4165 build2 (NE_EXPR, boolean_type_node,
4166 rhs, build_int_cst (type, 0)),
4167 gimple_assign_rhs2 (last_stmt),
4168 gimple_assign_rhs3 (last_stmt));
4169 *type_out = vectype;
4170 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4172 return pattern_stmt;
4174 else if (rhs_code == SSA_NAME
4175 && STMT_VINFO_DATA_REF (stmt_vinfo))
4177 stmt_vec_info pattern_stmt_info;
4178 tree nunits_vectype;
4179 if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4180 &nunits_vectype)
4181 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4182 return NULL;
4184 if (check_bool_pattern (var, vinfo, bool_stmts))
4185 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4186 TREE_TYPE (vectype), stmt_vinfo);
4187 else
4189 tree type = integer_type_for_mask (var, vinfo);
4190 tree cst0, cst1, new_vectype;
4192 if (!type)
4193 return NULL;
4195 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4196 type = TREE_TYPE (vectype);
4198 cst0 = build_int_cst (type, 0);
4199 cst1 = build_int_cst (type, 1);
4200 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4202 rhs = vect_recog_temp_ssa_var (type, NULL);
4203 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4204 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4207 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4208 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4210 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4211 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4212 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4213 rhs = rhs2;
4215 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4216 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4217 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4218 *type_out = vectype;
4219 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4221 return pattern_stmt;
4223 else
4224 return NULL;
4228 /* A helper for vect_recog_mask_conversion_pattern. Build
4229 conversion of MASK to a type suitable for masking VECTYPE.
4230 Built statement gets required vectype and is appended to
4231 a pattern sequence of STMT_VINFO.
4233 Return converted mask. */
4235 static tree
4236 build_mask_conversion (vec_info *vinfo,
4237 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4239 gimple *stmt;
4240 tree masktype, tmp;
4242 masktype = truth_type_for (vectype);
4243 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4244 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4245 append_pattern_def_seq (vinfo, stmt_vinfo,
4246 stmt, masktype, TREE_TYPE (vectype));
4248 return tmp;
4252 /* Function vect_recog_mask_conversion_pattern
4254 Try to find statements which require boolean type
4255 converison. Additional conversion statements are
4256 added to handle such cases. For example:
4258 bool m_1, m_2, m_3;
4259 int i_4, i_5;
4260 double d_6, d_7;
4261 char c_1, c_2, c_3;
4263 S1 m_1 = i_4 > i_5;
4264 S2 m_2 = d_6 < d_7;
4265 S3 m_3 = m_1 & m_2;
4266 S4 c_1 = m_3 ? c_2 : c_3;
4268 Will be transformed into:
4270 S1 m_1 = i_4 > i_5;
4271 S2 m_2 = d_6 < d_7;
4272 S3'' m_2' = (_Bool[bitsize=32])m_2
4273 S3' m_3' = m_1 & m_2';
4274 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4275 S4' c_1' = m_3'' ? c_2 : c_3; */
4277 static gimple *
4278 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4279 stmt_vec_info stmt_vinfo, tree *type_out)
4281 gimple *last_stmt = stmt_vinfo->stmt;
4282 enum tree_code rhs_code;
4283 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4284 tree vectype1, vectype2;
4285 stmt_vec_info pattern_stmt_info;
4286 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4287 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4289 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4290 if (is_gimple_call (last_stmt)
4291 && gimple_call_internal_p (last_stmt))
4293 gcall *pattern_stmt;
4295 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4296 int mask_argno = internal_fn_mask_index (ifn);
4297 if (mask_argno < 0)
4298 return NULL;
4300 bool store_p = internal_store_fn_p (ifn);
4301 if (store_p)
4303 int rhs_index = internal_fn_stored_value_index (ifn);
4304 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4305 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4307 else
4309 lhs = gimple_call_lhs (last_stmt);
4310 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4313 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4314 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4315 if (!mask_arg_type)
4316 return NULL;
4317 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4319 if (!vectype1 || !vectype2
4320 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4321 TYPE_VECTOR_SUBPARTS (vectype2)))
4322 return NULL;
4324 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4326 auto_vec<tree, 8> args;
4327 unsigned int nargs = gimple_call_num_args (last_stmt);
4328 args.safe_grow (nargs, true);
4329 for (unsigned int i = 0; i < nargs; ++i)
4330 args[i] = ((int) i == mask_argno
4331 ? tmp
4332 : gimple_call_arg (last_stmt, i));
4333 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4335 if (!store_p)
4337 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4338 gimple_call_set_lhs (pattern_stmt, lhs);
4340 gimple_call_set_nothrow (pattern_stmt, true);
4342 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4343 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4344 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4346 *type_out = vectype1;
4347 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4349 return pattern_stmt;
4352 if (!is_gimple_assign (last_stmt))
4353 return NULL;
4355 gimple *pattern_stmt;
4356 lhs = gimple_assign_lhs (last_stmt);
4357 rhs1 = gimple_assign_rhs1 (last_stmt);
4358 rhs_code = gimple_assign_rhs_code (last_stmt);
4360 /* Check for cond expression requiring mask conversion. */
4361 if (rhs_code == COND_EXPR)
4363 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4365 if (TREE_CODE (rhs1) == SSA_NAME)
4367 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4368 if (!rhs1_type)
4369 return NULL;
4371 else if (COMPARISON_CLASS_P (rhs1))
4373 /* Check whether we're comparing scalar booleans and (if so)
4374 whether a better mask type exists than the mask associated
4375 with boolean-sized elements. This avoids unnecessary packs
4376 and unpacks if the booleans are set from comparisons of
4377 wider types. E.g. in:
4379 int x1, x2, x3, x4, y1, y1;
4381 bool b1 = (x1 == x2);
4382 bool b2 = (x3 == x4);
4383 ... = b1 == b2 ? y1 : y2;
4385 it is better for b1 and b2 to use the mask type associated
4386 with int elements rather bool (byte) elements. */
4387 rhs1_op0 = TREE_OPERAND (rhs1, 0);
4388 rhs1_op1 = TREE_OPERAND (rhs1, 1);
4389 if (!rhs1_op0 || !rhs1_op1)
4390 return NULL;
4391 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4392 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4394 if (!rhs1_op0_type)
4395 rhs1_type = TREE_TYPE (rhs1_op0);
4396 else if (!rhs1_op1_type)
4397 rhs1_type = TREE_TYPE (rhs1_op1);
4398 else if (TYPE_PRECISION (rhs1_op0_type)
4399 != TYPE_PRECISION (rhs1_op1_type))
4401 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4402 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4403 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4404 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4405 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4407 if (abs (tmp0) > abs (tmp1))
4408 rhs1_type = rhs1_op1_type;
4409 else
4410 rhs1_type = rhs1_op0_type;
4412 else
4413 rhs1_type = build_nonstandard_integer_type
4414 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4416 else
4417 rhs1_type = rhs1_op0_type;
4419 else
4420 return NULL;
4422 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4424 if (!vectype1 || !vectype2)
4425 return NULL;
4427 /* Continue if a conversion is needed. Also continue if we have
4428 a comparison whose vector type would normally be different from
4429 VECTYPE2 when considered in isolation. In that case we'll
4430 replace the comparison with an SSA name (so that we can record
4431 its vector type) and behave as though the comparison was an SSA
4432 name from the outset. */
4433 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4434 TYPE_VECTOR_SUBPARTS (vectype2))
4435 && !rhs1_op0_type
4436 && !rhs1_op1_type)
4437 return NULL;
4439 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4440 in place, we can handle it in vectorizable_condition. This avoids
4441 unnecessary promotion stmts and increased vectorization factor. */
4442 if (COMPARISON_CLASS_P (rhs1)
4443 && INTEGRAL_TYPE_P (rhs1_type)
4444 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4445 TYPE_VECTOR_SUBPARTS (vectype2)))
4447 enum vect_def_type dt;
4448 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4449 && dt == vect_external_def
4450 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4451 && (dt == vect_external_def
4452 || dt == vect_constant_def))
4454 tree wide_scalar_type = build_nonstandard_integer_type
4455 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4456 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4457 wide_scalar_type);
4458 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4459 return NULL;
4463 /* If rhs1 is a comparison we need to move it into a
4464 separate statement. */
4465 if (TREE_CODE (rhs1) != SSA_NAME)
4467 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4468 if (rhs1_op0_type
4469 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4470 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4471 vectype2, stmt_vinfo);
4472 if (rhs1_op1_type
4473 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4474 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4475 vectype2, stmt_vinfo);
4476 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4477 rhs1_op0, rhs1_op1);
4478 rhs1 = tmp;
4479 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4480 rhs1_type);
4483 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4484 TYPE_VECTOR_SUBPARTS (vectype2)))
4485 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4486 else
4487 tmp = rhs1;
4489 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4490 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4491 gimple_assign_rhs2 (last_stmt),
4492 gimple_assign_rhs3 (last_stmt));
4494 *type_out = vectype1;
4495 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4497 return pattern_stmt;
4500 /* Now check for binary boolean operations requiring conversion for
4501 one of operands. */
4502 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4503 return NULL;
4505 if (rhs_code != BIT_IOR_EXPR
4506 && rhs_code != BIT_XOR_EXPR
4507 && rhs_code != BIT_AND_EXPR
4508 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4509 return NULL;
4511 rhs2 = gimple_assign_rhs2 (last_stmt);
4513 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4514 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4516 if (!rhs1_type || !rhs2_type
4517 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4518 return NULL;
4520 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4522 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4523 if (!vectype1)
4524 return NULL;
4525 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4527 else
4529 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4530 if (!vectype1)
4531 return NULL;
4532 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4535 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4536 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4538 *type_out = vectype1;
4539 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4541 return pattern_stmt;
4544 /* STMT_INFO is a load or store. If the load or store is conditional, return
4545 the boolean condition under which it occurs, otherwise return null. */
4547 static tree
4548 vect_get_load_store_mask (stmt_vec_info stmt_info)
4550 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4552 gcc_assert (gimple_assign_single_p (def_assign));
4553 return NULL_TREE;
4556 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4558 internal_fn ifn = gimple_call_internal_fn (def_call);
4559 int mask_index = internal_fn_mask_index (ifn);
4560 return gimple_call_arg (def_call, mask_index);
4563 gcc_unreachable ();
4566 /* Return MASK if MASK is suitable for masking an operation on vectors
4567 of type VECTYPE, otherwise convert it into such a form and return
4568 the result. Associate any conversion statements with STMT_INFO's
4569 pattern. */
4571 static tree
4572 vect_convert_mask_for_vectype (tree mask, tree vectype,
4573 stmt_vec_info stmt_info, vec_info *vinfo)
4575 tree mask_type = integer_type_for_mask (mask, vinfo);
4576 if (mask_type)
4578 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4579 if (mask_vectype
4580 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4581 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4582 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4584 return mask;
4587 /* Return the equivalent of:
4589 fold_convert (TYPE, VALUE)
4591 with the expectation that the operation will be vectorized.
4592 If new statements are needed, add them as pattern statements
4593 to STMT_INFO. */
4595 static tree
4596 vect_add_conversion_to_pattern (vec_info *vinfo,
4597 tree type, tree value, stmt_vec_info stmt_info)
4599 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4600 return value;
4602 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4603 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4604 append_pattern_def_seq (vinfo, stmt_info, conversion,
4605 get_vectype_for_scalar_type (vinfo, type));
4606 return new_value;
4609 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4610 internal function. Return the final statement on success and set
4611 *TYPE_OUT to the vector type being loaded or stored.
4613 This function only handles gathers and scatters that were recognized
4614 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4616 static gimple *
4617 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4618 stmt_vec_info stmt_info, tree *type_out)
4620 /* Currently we only support this for loop vectorization. */
4621 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4622 if (!loop_vinfo)
4623 return NULL;
4625 /* Make sure that we're looking at a gather load or scatter store. */
4626 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4627 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4628 return NULL;
4630 /* Get the boolean that controls whether the load or store happens.
4631 This is null if the operation is unconditional. */
4632 tree mask = vect_get_load_store_mask (stmt_info);
4634 /* Make sure that the target supports an appropriate internal
4635 function for the gather/scatter operation. */
4636 gather_scatter_info gs_info;
4637 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4638 || gs_info.decl)
4639 return NULL;
4641 /* Convert the mask to the right form. */
4642 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4643 gs_info.element_type);
4644 if (mask)
4645 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4646 loop_vinfo);
4648 /* Get the invariant base and non-invariant offset, converting the
4649 latter to the same width as the vector elements. */
4650 tree base = gs_info.base;
4651 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4652 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4653 gs_info.offset, stmt_info);
4655 /* Build the new pattern statement. */
4656 tree scale = size_int (gs_info.scale);
4657 gcall *pattern_stmt;
4658 if (DR_IS_READ (dr))
4660 tree zero = build_zero_cst (gs_info.element_type);
4661 if (mask != NULL)
4662 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4663 offset, scale, zero, mask);
4664 else
4665 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4666 offset, scale, zero);
4667 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4668 gimple_call_set_lhs (pattern_stmt, load_lhs);
4670 else
4672 tree rhs = vect_get_store_rhs (stmt_info);
4673 if (mask != NULL)
4674 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4675 base, offset, scale, rhs,
4676 mask);
4677 else
4678 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4679 base, offset, scale, rhs);
4681 gimple_call_set_nothrow (pattern_stmt, true);
4683 /* Copy across relevant vectorization info and associate DR with the
4684 new pattern statement instead of the original statement. */
4685 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4686 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4688 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4689 *type_out = vectype;
4690 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4692 return pattern_stmt;
4695 /* Return true if TYPE is a non-boolean integer type. These are the types
4696 that we want to consider for narrowing. */
4698 static bool
4699 vect_narrowable_type_p (tree type)
4701 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4704 /* Return true if the operation given by CODE can be truncated to N bits
4705 when only N bits of the output are needed. This is only true if bit N+1
4706 of the inputs has no effect on the low N bits of the result. */
4708 static bool
4709 vect_truncatable_operation_p (tree_code code)
4711 switch (code)
4713 case PLUS_EXPR:
4714 case MINUS_EXPR:
4715 case MULT_EXPR:
4716 case BIT_AND_EXPR:
4717 case BIT_IOR_EXPR:
4718 case BIT_XOR_EXPR:
4719 case COND_EXPR:
4720 return true;
4722 default:
4723 return false;
4727 /* Record that STMT_INFO could be changed from operating on TYPE to
4728 operating on a type with the precision and sign given by PRECISION
4729 and SIGN respectively. PRECISION is an arbitrary bit precision;
4730 it might not be a whole number of bytes. */
4732 static void
4733 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4734 unsigned int precision, signop sign)
4736 /* Round the precision up to a whole number of bytes. */
4737 precision = vect_element_precision (precision);
4738 if (precision < TYPE_PRECISION (type)
4739 && (!stmt_info->operation_precision
4740 || stmt_info->operation_precision > precision))
4742 stmt_info->operation_precision = precision;
4743 stmt_info->operation_sign = sign;
4747 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4748 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4749 is an arbitrary bit precision; it might not be a whole number of bytes. */
4751 static void
4752 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4753 unsigned int min_input_precision)
4755 /* This operation in isolation only requires the inputs to have
4756 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4757 that MIN_INPUT_PRECISION is a natural precision for the chain
4758 as a whole. E.g. consider something like:
4760 unsigned short *x, *y;
4761 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4763 The right shift can be done on unsigned chars, and only requires the
4764 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4765 approach would mean turning a natural chain of single-vector unsigned
4766 short operations into one that truncates "*x" and then extends
4767 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4768 operation and one vector for each unsigned char operation.
4769 This would be a significant pessimization.
4771 Instead only propagate the maximum of this precision and the precision
4772 required by the users of the result. This means that we don't pessimize
4773 the case above but continue to optimize things like:
4775 unsigned char *y;
4776 unsigned short *x;
4777 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4779 Here we would truncate two vectors of *x to a single vector of
4780 unsigned chars and use single-vector unsigned char operations for
4781 everything else, rather than doing two unsigned short copies of
4782 "(*x & 0xf0) >> 4" and then truncating the result. */
4783 min_input_precision = MAX (min_input_precision,
4784 stmt_info->min_output_precision);
4786 if (min_input_precision < TYPE_PRECISION (type)
4787 && (!stmt_info->min_input_precision
4788 || stmt_info->min_input_precision > min_input_precision))
4789 stmt_info->min_input_precision = min_input_precision;
4792 /* Subroutine of vect_determine_min_output_precision. Return true if
4793 we can calculate a reduced number of output bits for STMT_INFO,
4794 whose result is LHS. */
4796 static bool
4797 vect_determine_min_output_precision_1 (vec_info *vinfo,
4798 stmt_vec_info stmt_info, tree lhs)
4800 /* Take the maximum precision required by users of the result. */
4801 unsigned int precision = 0;
4802 imm_use_iterator iter;
4803 use_operand_p use;
4804 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4806 gimple *use_stmt = USE_STMT (use);
4807 if (is_gimple_debug (use_stmt))
4808 continue;
4809 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4810 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4811 return false;
4812 /* The input precision recorded for COND_EXPRs applies only to the
4813 "then" and "else" values. */
4814 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4815 if (assign
4816 && gimple_assign_rhs_code (assign) == COND_EXPR
4817 && use->use != gimple_assign_rhs2_ptr (assign)
4818 && use->use != gimple_assign_rhs3_ptr (assign))
4819 return false;
4820 precision = MAX (precision, use_stmt_info->min_input_precision);
4823 if (dump_enabled_p ())
4824 dump_printf_loc (MSG_NOTE, vect_location,
4825 "only the low %d bits of %T are significant\n",
4826 precision, lhs);
4827 stmt_info->min_output_precision = precision;
4828 return true;
4831 /* Calculate min_output_precision for STMT_INFO. */
4833 static void
4834 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
4836 /* We're only interested in statements with a narrowable result. */
4837 tree lhs = gimple_get_lhs (stmt_info->stmt);
4838 if (!lhs
4839 || TREE_CODE (lhs) != SSA_NAME
4840 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4841 return;
4843 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
4844 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4847 /* Use range information to decide whether STMT (described by STMT_INFO)
4848 could be done in a narrower type. This is effectively a forward
4849 propagation, since it uses context-independent information that applies
4850 to all users of an SSA name. */
4852 static void
4853 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4855 tree lhs = gimple_assign_lhs (stmt);
4856 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4857 return;
4859 tree type = TREE_TYPE (lhs);
4860 if (!vect_narrowable_type_p (type))
4861 return;
4863 /* First see whether we have any useful range information for the result. */
4864 unsigned int precision = TYPE_PRECISION (type);
4865 signop sign = TYPE_SIGN (type);
4866 wide_int min_value, max_value;
4867 if (!vect_get_range_info (lhs, &min_value, &max_value))
4868 return;
4870 tree_code code = gimple_assign_rhs_code (stmt);
4871 unsigned int nops = gimple_num_ops (stmt);
4873 if (!vect_truncatable_operation_p (code))
4874 /* Check that all relevant input operands are compatible, and update
4875 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4876 for (unsigned int i = 1; i < nops; ++i)
4878 tree op = gimple_op (stmt, i);
4879 if (TREE_CODE (op) == INTEGER_CST)
4881 /* Don't require the integer to have RHS_TYPE (which it might
4882 not for things like shift amounts, etc.), but do require it
4883 to fit the type. */
4884 if (!int_fits_type_p (op, type))
4885 return;
4887 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4888 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4890 else if (TREE_CODE (op) == SSA_NAME)
4892 /* Ignore codes that don't take uniform arguments. */
4893 if (!types_compatible_p (TREE_TYPE (op), type))
4894 return;
4896 wide_int op_min_value, op_max_value;
4897 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4898 return;
4900 min_value = wi::min (min_value, op_min_value, sign);
4901 max_value = wi::max (max_value, op_max_value, sign);
4903 else
4904 return;
4907 /* Try to switch signed types for unsigned types if we can.
4908 This is better for two reasons. First, unsigned ops tend
4909 to be cheaper than signed ops. Second, it means that we can
4910 handle things like:
4912 signed char c;
4913 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4917 signed char c;
4918 unsigned short res_1 = (unsigned short) c & 0xff00;
4919 int res = (int) res_1;
4921 where the intermediate result res_1 has unsigned rather than
4922 signed type. */
4923 if (sign == SIGNED && !wi::neg_p (min_value))
4924 sign = UNSIGNED;
4926 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4927 unsigned int precision1 = wi::min_precision (min_value, sign);
4928 unsigned int precision2 = wi::min_precision (max_value, sign);
4929 unsigned int value_precision = MAX (precision1, precision2);
4930 if (value_precision >= precision)
4931 return;
4933 if (dump_enabled_p ())
4934 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4935 " without loss of precision: %G",
4936 sign == SIGNED ? "signed" : "unsigned",
4937 value_precision, stmt);
4939 vect_set_operation_type (stmt_info, type, value_precision, sign);
4940 vect_set_min_input_precision (stmt_info, type, value_precision);
4943 /* Use information about the users of STMT's result to decide whether
4944 STMT (described by STMT_INFO) could be done in a narrower type.
4945 This is effectively a backward propagation. */
4947 static void
4948 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4950 tree_code code = gimple_assign_rhs_code (stmt);
4951 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4952 tree type = TREE_TYPE (gimple_op (stmt, opno));
4953 if (!vect_narrowable_type_p (type))
4954 return;
4956 unsigned int precision = TYPE_PRECISION (type);
4957 unsigned int operation_precision, min_input_precision;
4958 switch (code)
4960 CASE_CONVERT:
4961 /* Only the bits that contribute to the output matter. Don't change
4962 the precision of the operation itself. */
4963 operation_precision = precision;
4964 min_input_precision = stmt_info->min_output_precision;
4965 break;
4967 case LSHIFT_EXPR:
4968 case RSHIFT_EXPR:
4970 tree shift = gimple_assign_rhs2 (stmt);
4971 if (TREE_CODE (shift) != INTEGER_CST
4972 || !wi::ltu_p (wi::to_widest (shift), precision))
4973 return;
4974 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4975 if (code == LSHIFT_EXPR)
4977 /* Avoid creating an undefined shift.
4979 ??? We could instead use min_output_precision as-is and
4980 optimize out-of-range shifts to zero. However, only
4981 degenerate testcases shift away all their useful input data,
4982 and it isn't natural to drop input operations in the middle
4983 of vectorization. This sort of thing should really be
4984 handled before vectorization. */
4985 operation_precision = MAX (stmt_info->min_output_precision,
4986 const_shift + 1);
4987 /* We need CONST_SHIFT fewer bits of the input. */
4988 min_input_precision = (MAX (operation_precision, const_shift)
4989 - const_shift);
4991 else
4993 /* We need CONST_SHIFT extra bits to do the operation. */
4994 operation_precision = (stmt_info->min_output_precision
4995 + const_shift);
4996 min_input_precision = operation_precision;
4998 break;
5001 default:
5002 if (vect_truncatable_operation_p (code))
5004 /* Input bit N has no effect on output bits N-1 and lower. */
5005 operation_precision = stmt_info->min_output_precision;
5006 min_input_precision = operation_precision;
5007 break;
5009 return;
5012 if (operation_precision < precision)
5014 if (dump_enabled_p ())
5015 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5016 " without affecting users: %G",
5017 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5018 operation_precision, stmt);
5019 vect_set_operation_type (stmt_info, type, operation_precision,
5020 TYPE_SIGN (type));
5022 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5025 /* Return true if the statement described by STMT_INFO sets a boolean
5026 SSA_NAME and if we know how to vectorize this kind of statement using
5027 vector mask types. */
5029 static bool
5030 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5032 tree lhs = gimple_get_lhs (stmt_info->stmt);
5033 if (!lhs
5034 || TREE_CODE (lhs) != SSA_NAME
5035 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5036 return false;
5038 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5040 tree_code rhs_code = gimple_assign_rhs_code (assign);
5041 switch (rhs_code)
5043 CASE_CONVERT:
5044 case SSA_NAME:
5045 case BIT_NOT_EXPR:
5046 case BIT_IOR_EXPR:
5047 case BIT_XOR_EXPR:
5048 case BIT_AND_EXPR:
5049 return true;
5051 default:
5052 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5055 else if (is_a <gphi *> (stmt_info->stmt))
5056 return true;
5057 return false;
5060 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5061 a vector mask type instead of a normal vector type. Record the
5062 result in STMT_INFO->mask_precision. */
5064 static void
5065 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5067 if (!possible_vector_mask_operation_p (stmt_info))
5068 return;
5070 /* If at least one boolean input uses a vector mask type,
5071 pick the mask type with the narrowest elements.
5073 ??? This is the traditional behavior. It should always produce
5074 the smallest number of operations, but isn't necessarily the
5075 optimal choice. For example, if we have:
5077 a = b & c
5079 where:
5081 - the user of a wants it to have a mask type for 16-bit elements (M16)
5082 - b also uses M16
5083 - c uses a mask type for 8-bit elements (M8)
5085 then picking M8 gives:
5087 - 1 M16->M8 pack for b
5088 - 1 M8 AND for a
5089 - 2 M8->M16 unpacks for the user of a
5091 whereas picking M16 would have given:
5093 - 2 M8->M16 unpacks for c
5094 - 2 M16 ANDs for a
5096 The number of operations are equal, but M16 would have given
5097 a shorter dependency chain and allowed more ILP. */
5098 unsigned int precision = ~0U;
5099 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5101 unsigned int nops = gimple_num_ops (assign);
5102 for (unsigned int i = 1; i < nops; ++i)
5104 tree rhs = gimple_op (assign, i);
5105 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5106 continue;
5108 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5109 if (!def_stmt_info)
5110 /* Don't let external or constant operands influence the choice.
5111 We can convert them to whichever vector type we pick. */
5112 continue;
5114 if (def_stmt_info->mask_precision)
5116 if (precision > def_stmt_info->mask_precision)
5117 precision = def_stmt_info->mask_precision;
5121 /* If the statement compares two values that shouldn't use vector masks,
5122 try comparing the values as normal scalars instead. */
5123 tree_code rhs_code = gimple_assign_rhs_code (assign);
5124 if (precision == ~0U
5125 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5127 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5128 scalar_mode mode;
5129 tree vectype, mask_type;
5130 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5131 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5132 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5133 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5134 precision = GET_MODE_BITSIZE (mode);
5137 else
5139 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5140 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5142 tree rhs = gimple_phi_arg_def (phi, i);
5144 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5145 if (!def_stmt_info)
5146 /* Don't let external or constant operands influence the choice.
5147 We can convert them to whichever vector type we pick. */
5148 continue;
5150 if (def_stmt_info->mask_precision)
5152 if (precision > def_stmt_info->mask_precision)
5153 precision = def_stmt_info->mask_precision;
5158 if (dump_enabled_p ())
5160 if (precision == ~0U)
5161 dump_printf_loc (MSG_NOTE, vect_location,
5162 "using normal nonmask vectors for %G",
5163 stmt_info->stmt);
5164 else
5165 dump_printf_loc (MSG_NOTE, vect_location,
5166 "using boolean precision %d for %G",
5167 precision, stmt_info->stmt);
5170 stmt_info->mask_precision = precision;
5173 /* Handle vect_determine_precisions for STMT_INFO, given that we
5174 have already done so for the users of its result. */
5176 void
5177 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5179 vect_determine_min_output_precision (vinfo, stmt_info);
5180 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5182 vect_determine_precisions_from_range (stmt_info, stmt);
5183 vect_determine_precisions_from_users (stmt_info, stmt);
5187 /* Walk backwards through the vectorizable region to determine the
5188 values of these fields:
5190 - min_output_precision
5191 - min_input_precision
5192 - operation_precision
5193 - operation_sign. */
5195 void
5196 vect_determine_precisions (vec_info *vinfo)
5198 DUMP_VECT_SCOPE ("vect_determine_precisions");
5200 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5202 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5203 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5204 unsigned int nbbs = loop->num_nodes;
5206 for (unsigned int i = 0; i < nbbs; i++)
5208 basic_block bb = bbs[i];
5209 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5210 if (!is_gimple_debug (gsi_stmt (si)))
5211 vect_determine_mask_precision
5212 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5214 for (unsigned int i = 0; i < nbbs; i++)
5216 basic_block bb = bbs[nbbs - i - 1];
5217 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5218 !gsi_end_p (si); gsi_prev (&si))
5219 if (!is_gimple_debug (gsi_stmt (si)))
5220 vect_determine_stmt_precisions
5221 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5224 else
5226 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5227 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5229 basic_block bb = bb_vinfo->bbs[i];
5230 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5232 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5233 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5234 vect_determine_mask_precision (vinfo, stmt_info);
5236 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5238 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5239 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5240 vect_determine_mask_precision (vinfo, stmt_info);
5243 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5245 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5246 !gsi_end_p (gsi); gsi_prev (&gsi))
5248 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5249 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5250 vect_determine_stmt_precisions (vinfo, stmt_info);
5252 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5253 !gsi_end_p (gsi); gsi_next (&gsi))
5255 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5256 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5257 vect_determine_stmt_precisions (vinfo, stmt_info);
5263 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5265 struct vect_recog_func
5267 vect_recog_func_ptr fn;
5268 const char *name;
5271 /* Note that ordering matters - the first pattern matching on a stmt is
5272 taken which means usually the more complex one needs to preceed the
5273 less comples onex (widen_sum only after dot_prod or sad for example). */
5274 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5275 { vect_recog_over_widening_pattern, "over_widening" },
5276 /* Must come after over_widening, which narrows the shift as much as
5277 possible beforehand. */
5278 { vect_recog_average_pattern, "average" },
5279 { vect_recog_mulhs_pattern, "mult_high" },
5280 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5281 { vect_recog_widen_mult_pattern, "widen_mult" },
5282 { vect_recog_dot_prod_pattern, "dot_prod" },
5283 { vect_recog_sad_pattern, "sad" },
5284 { vect_recog_widen_sum_pattern, "widen_sum" },
5285 { vect_recog_pow_pattern, "pow" },
5286 { vect_recog_widen_shift_pattern, "widen_shift" },
5287 { vect_recog_rotate_pattern, "rotate" },
5288 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5289 { vect_recog_divmod_pattern, "divmod" },
5290 { vect_recog_mult_pattern, "mult" },
5291 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5292 { vect_recog_bool_pattern, "bool" },
5293 /* This must come before mask conversion, and includes the parts
5294 of mask conversion that are needed for gather and scatter
5295 internal functions. */
5296 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5297 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5298 { vect_recog_widen_plus_pattern, "widen_plus" },
5299 { vect_recog_widen_minus_pattern, "widen_minus" },
5302 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5304 /* Mark statements that are involved in a pattern. */
5306 void
5307 vect_mark_pattern_stmts (vec_info *vinfo,
5308 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5309 tree pattern_vectype)
5311 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5312 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5314 gimple *orig_pattern_stmt = NULL;
5315 if (is_pattern_stmt_p (orig_stmt_info))
5317 /* We're replacing a statement in an existing pattern definition
5318 sequence. */
5319 orig_pattern_stmt = orig_stmt_info->stmt;
5320 if (dump_enabled_p ())
5321 dump_printf_loc (MSG_NOTE, vect_location,
5322 "replacing earlier pattern %G", orig_pattern_stmt);
5324 /* To keep the book-keeping simple, just swap the lhs of the
5325 old and new statements, so that the old one has a valid but
5326 unused lhs. */
5327 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5328 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5329 gimple_set_lhs (pattern_stmt, old_lhs);
5331 if (dump_enabled_p ())
5332 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5334 /* Switch to the statement that ORIG replaces. */
5335 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5337 /* We shouldn't be replacing the main pattern statement. */
5338 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5339 != orig_pattern_stmt);
5342 if (def_seq)
5343 for (gimple_stmt_iterator si = gsi_start (def_seq);
5344 !gsi_end_p (si); gsi_next (&si))
5346 if (dump_enabled_p ())
5347 dump_printf_loc (MSG_NOTE, vect_location,
5348 "extra pattern stmt: %G", gsi_stmt (si));
5349 stmt_vec_info pattern_stmt_info
5350 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5351 orig_stmt_info, pattern_vectype);
5352 /* Stmts in the def sequence are not vectorizable cycle or
5353 induction defs, instead they should all be vect_internal_def
5354 feeding the main pattern stmt which retains this def type. */
5355 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5358 if (orig_pattern_stmt)
5360 vect_init_pattern_stmt (vinfo, pattern_stmt,
5361 orig_stmt_info, pattern_vectype);
5363 /* Insert all the new pattern statements before the original one. */
5364 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5365 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5366 orig_def_seq);
5367 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5368 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5370 /* Remove the pattern statement that this new pattern replaces. */
5371 gsi_remove (&gsi, false);
5373 else
5374 vect_set_pattern_stmt (vinfo,
5375 pattern_stmt, orig_stmt_info, pattern_vectype);
5377 /* Transfer reduction path info to the pattern. */
5378 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5380 tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5381 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5382 /* Search the pattern def sequence and the main pattern stmt. Note
5383 we may have inserted all into a containing pattern def sequence
5384 so the following is a bit awkward. */
5385 gimple_stmt_iterator si;
5386 gimple *s;
5387 if (def_seq)
5389 si = gsi_start (def_seq);
5390 s = gsi_stmt (si);
5391 gsi_next (&si);
5393 else
5395 si = gsi_none ();
5396 s = pattern_stmt;
5400 bool found = false;
5401 for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5402 if (gimple_op (s, i) == lookfor)
5404 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5405 lookfor = gimple_get_lhs (s);
5406 found = true;
5407 break;
5409 if (s == pattern_stmt)
5411 if (!found && dump_enabled_p ())
5412 dump_printf_loc (MSG_NOTE, vect_location,
5413 "failed to update reduction index.\n");
5414 break;
5416 if (gsi_end_p (si))
5417 s = pattern_stmt;
5418 else
5420 s = gsi_stmt (si);
5421 if (s == pattern_stmt)
5422 /* Found the end inside a bigger pattern def seq. */
5423 si = gsi_none ();
5424 else
5425 gsi_next (&si);
5427 } while (1);
5431 /* Function vect_pattern_recog_1
5433 Input:
5434 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5435 computation pattern.
5436 STMT_INFO: A stmt from which the pattern search should start.
5438 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5439 a sequence of statements that has the same functionality and can be
5440 used to replace STMT_INFO. It returns the last statement in the sequence
5441 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5442 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5443 statement, having first checked that the target supports the new operation
5444 in that type.
5446 This function also does some bookkeeping, as explained in the documentation
5447 for vect_recog_pattern. */
5449 static void
5450 vect_pattern_recog_1 (vec_info *vinfo,
5451 vect_recog_func *recog_func, stmt_vec_info stmt_info)
5453 gimple *pattern_stmt;
5454 loop_vec_info loop_vinfo;
5455 tree pattern_vectype;
5457 /* If this statement has already been replaced with pattern statements,
5458 leave the original statement alone, since the first match wins.
5459 Instead try to match against the definition statements that feed
5460 the main pattern statement. */
5461 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5463 gimple_stmt_iterator gsi;
5464 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5465 !gsi_end_p (gsi); gsi_next (&gsi))
5466 vect_pattern_recog_1 (vinfo, recog_func,
5467 vinfo->lookup_stmt (gsi_stmt (gsi)));
5468 return;
5471 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5472 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5473 if (!pattern_stmt)
5475 /* Clear any half-formed pattern definition sequence. */
5476 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5477 return;
5480 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5481 gcc_assert (pattern_vectype);
5483 /* Found a vectorizable pattern. */
5484 if (dump_enabled_p ())
5485 dump_printf_loc (MSG_NOTE, vect_location,
5486 "%s pattern recognized: %G",
5487 recog_func->name, pattern_stmt);
5489 /* Mark the stmts that are involved in the pattern. */
5490 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5492 /* Patterns cannot be vectorized using SLP, because they change the order of
5493 computation. */
5494 if (loop_vinfo)
5496 unsigned ix, ix2;
5497 stmt_vec_info *elem_ptr;
5498 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5499 elem_ptr, *elem_ptr == stmt_info);
5504 /* Function vect_pattern_recog
5506 Input:
5507 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5508 computation idioms.
5510 Output - for each computation idiom that is detected we create a new stmt
5511 that provides the same functionality and that can be vectorized. We
5512 also record some information in the struct_stmt_info of the relevant
5513 stmts, as explained below:
5515 At the entry to this function we have the following stmts, with the
5516 following initial value in the STMT_VINFO fields:
5518 stmt in_pattern_p related_stmt vec_stmt
5519 S1: a_i = .... - - -
5520 S2: a_2 = ..use(a_i).. - - -
5521 S3: a_1 = ..use(a_2).. - - -
5522 S4: a_0 = ..use(a_1).. - - -
5523 S5: ... = ..use(a_0).. - - -
5525 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5526 represented by a single stmt. We then:
5527 - create a new stmt S6 equivalent to the pattern (the stmt is not
5528 inserted into the code)
5529 - fill in the STMT_VINFO fields as follows:
5531 in_pattern_p related_stmt vec_stmt
5532 S1: a_i = .... - - -
5533 S2: a_2 = ..use(a_i).. - - -
5534 S3: a_1 = ..use(a_2).. - - -
5535 S4: a_0 = ..use(a_1).. true S6 -
5536 '---> S6: a_new = .... - S4 -
5537 S5: ... = ..use(a_0).. - - -
5539 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5540 to each other through the RELATED_STMT field).
5542 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5543 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5544 remain irrelevant unless used by stmts other than S4.
5546 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5547 (because they are marked as irrelevant). It will vectorize S6, and record
5548 a pointer to the new vector stmt VS6 from S6 (as usual).
5549 S4 will be skipped, and S5 will be vectorized as usual:
5551 in_pattern_p related_stmt vec_stmt
5552 S1: a_i = .... - - -
5553 S2: a_2 = ..use(a_i).. - - -
5554 S3: a_1 = ..use(a_2).. - - -
5555 > VS6: va_new = .... - - -
5556 S4: a_0 = ..use(a_1).. true S6 VS6
5557 '---> S6: a_new = .... - S4 VS6
5558 > VS5: ... = ..vuse(va_new).. - - -
5559 S5: ... = ..use(a_0).. - - -
5561 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5562 elsewhere), and we'll end up with:
5564 VS6: va_new = ....
5565 VS5: ... = ..vuse(va_new)..
5567 In case of more than one pattern statements, e.g., widen-mult with
5568 intermediate type:
5570 S1 a_t = ;
5571 S2 a_T = (TYPE) a_t;
5572 '--> S3: a_it = (interm_type) a_t;
5573 S4 prod_T = a_T * CONST;
5574 '--> S5: prod_T' = a_it w* CONST;
5576 there may be other users of a_T outside the pattern. In that case S2 will
5577 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5578 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5579 be recorded in S3. */
5581 void
5582 vect_pattern_recog (vec_info *vinfo)
5584 class loop *loop;
5585 basic_block *bbs;
5586 unsigned int nbbs;
5587 gimple_stmt_iterator si;
5588 unsigned int i, j;
5590 vect_determine_precisions (vinfo);
5592 DUMP_VECT_SCOPE ("vect_pattern_recog");
5594 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5596 loop = LOOP_VINFO_LOOP (loop_vinfo);
5597 bbs = LOOP_VINFO_BBS (loop_vinfo);
5598 nbbs = loop->num_nodes;
5600 /* Scan through the loop stmts, applying the pattern recognition
5601 functions starting at each stmt visited: */
5602 for (i = 0; i < nbbs; i++)
5604 basic_block bb = bbs[i];
5605 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5607 if (is_gimple_debug (gsi_stmt (si)))
5608 continue;
5609 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5610 /* Scan over all generic vect_recog_xxx_pattern functions. */
5611 for (j = 0; j < NUM_PATTERNS; j++)
5612 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5613 stmt_info);
5617 else
5619 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5620 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5621 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5622 !gsi_end_p (gsi); gsi_next (&gsi))
5624 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5625 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5626 continue;
5628 /* Scan over all generic vect_recog_xxx_pattern functions. */
5629 for (j = 0; j < NUM_PATTERNS; j++)
5630 vect_pattern_recog_1 (vinfo,
5631 &vect_vect_recog_func_ptrs[j], stmt_info);
5635 /* After this no more add_stmt calls are allowed. */
5636 vinfo->stmt_vec_info_ro = true;