Don't warn when alignment of global common data exceeds maximum alignment.
[official-gcc.git] / gcc / tree-vect-patterns.c
blob899734005ceca8e108a6a51029706a1efbd3e174
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 #include "gimple-range.h"
53 /* Return true if we have a useful VR_RANGE range for VAR, storing it
54 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
56 static bool
57 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
59 value_range vr;
60 get_range_query (cfun)->range_of_expr (vr, var);
61 if (vr.undefined_p ())
62 vr.set_varying (TREE_TYPE (var));
63 *min_value = wi::to_wide (vr.min ());
64 *max_value = wi::to_wide (vr.max ());
65 value_range_kind vr_type = vr.kind ();
66 wide_int nonzero = get_nonzero_bits (var);
67 signop sgn = TYPE_SIGN (TREE_TYPE (var));
68 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
69 nonzero, sgn) == VR_RANGE)
71 if (dump_enabled_p ())
73 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
74 dump_printf (MSG_NOTE, " has range [");
75 dump_hex (MSG_NOTE, *min_value);
76 dump_printf (MSG_NOTE, ", ");
77 dump_hex (MSG_NOTE, *max_value);
78 dump_printf (MSG_NOTE, "]\n");
80 return true;
82 else
84 if (dump_enabled_p ())
86 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
87 dump_printf (MSG_NOTE, " has no range info\n");
89 return false;
93 /* Report that we've found an instance of pattern PATTERN in
94 statement STMT. */
96 static void
97 vect_pattern_detected (const char *name, gimple *stmt)
99 if (dump_enabled_p ())
100 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
103 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
104 return the pattern statement's stmt_vec_info. Set its vector type to
105 VECTYPE if it doesn't have one already. */
107 static stmt_vec_info
108 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
109 stmt_vec_info orig_stmt_info, tree vectype)
111 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
112 if (pattern_stmt_info == NULL)
113 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
114 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
116 pattern_stmt_info->pattern_stmt_p = true;
117 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
118 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
119 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
120 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
122 gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
123 == vect_use_mask_type_p (orig_stmt_info));
124 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
125 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
127 return pattern_stmt_info;
130 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
131 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
132 have one already. */
134 static void
135 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
136 stmt_vec_info orig_stmt_info, tree vectype)
138 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
139 STMT_VINFO_RELATED_STMT (orig_stmt_info)
140 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
143 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
144 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
145 be different from the vector type of the final pattern statement.
146 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
147 from which it was derived. */
149 static inline void
150 append_pattern_def_seq (vec_info *vinfo,
151 stmt_vec_info stmt_info, gimple *new_stmt,
152 tree vectype = NULL_TREE,
153 tree scalar_type_for_mask = NULL_TREE)
155 gcc_assert (!scalar_type_for_mask
156 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
157 if (vectype)
159 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
160 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
161 if (scalar_type_for_mask)
162 new_stmt_info->mask_precision
163 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
165 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
166 new_stmt);
169 /* The caller wants to perform new operations on vect_external variable
170 VAR, so that the result of the operations would also be vect_external.
171 Return the edge on which the operations can be performed, if one exists.
172 Return null if the operations should instead be treated as part of
173 the pattern that needs them. */
175 static edge
176 vect_get_external_def_edge (vec_info *vinfo, tree var)
178 edge e = NULL;
179 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
181 e = loop_preheader_edge (loop_vinfo->loop);
182 if (!SSA_NAME_IS_DEFAULT_DEF (var))
184 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
185 if (bb == NULL
186 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
187 e = NULL;
190 return e;
193 /* Return true if the target supports a vector version of CODE,
194 where CODE is known to map to a direct optab with the given SUBTYPE.
195 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
196 specifies the type of the scalar result.
198 If CODE allows the inputs and outputs to have different type
199 (such as for WIDEN_SUM_EXPR), it is the input mode rather
200 than the output mode that determines the appropriate target pattern.
201 Operand 0 of the target pattern then specifies the mode that the output
202 must have.
204 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
205 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
206 is nonnull. */
208 static bool
209 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
210 tree itype, tree *vecotype_out,
211 tree *vecitype_out = NULL,
212 enum optab_subtype subtype = optab_default)
214 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
215 if (!vecitype)
216 return false;
218 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
219 if (!vecotype)
220 return false;
222 optab optab = optab_for_tree_code (code, vecitype, subtype);
223 if (!optab)
224 return false;
226 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
227 if (icode == CODE_FOR_nothing
228 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
229 return false;
231 *vecotype_out = vecotype;
232 if (vecitype_out)
233 *vecitype_out = vecitype;
234 return true;
237 /* Round bit precision PRECISION up to a full element. */
239 static unsigned int
240 vect_element_precision (unsigned int precision)
242 precision = 1 << ceil_log2 (precision);
243 return MAX (precision, BITS_PER_UNIT);
246 /* If OP is defined by a statement that's being considered for vectorization,
247 return information about that statement, otherwise return NULL. */
249 static stmt_vec_info
250 vect_get_internal_def (vec_info *vinfo, tree op)
252 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
253 if (def_stmt_info
254 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
255 return def_stmt_info;
256 return NULL;
259 /* Check whether NAME, an ssa-name used in STMT_VINFO,
260 is a result of a type promotion, such that:
261 DEF_STMT: NAME = NOP (name0)
262 If CHECK_SIGN is TRUE, check that either both types are signed or both are
263 unsigned. */
265 static bool
266 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
267 tree *orig_type, gimple **def_stmt, bool *promotion)
269 tree type = TREE_TYPE (name);
270 tree oprnd0;
271 enum vect_def_type dt;
273 stmt_vec_info def_stmt_info;
274 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
275 return false;
277 if (dt != vect_internal_def
278 && dt != vect_external_def && dt != vect_constant_def)
279 return false;
281 if (!*def_stmt)
282 return false;
284 if (!is_gimple_assign (*def_stmt))
285 return false;
287 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
288 return false;
290 oprnd0 = gimple_assign_rhs1 (*def_stmt);
292 *orig_type = TREE_TYPE (oprnd0);
293 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
294 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
295 return false;
297 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
298 *promotion = true;
299 else
300 *promotion = false;
302 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
303 return false;
305 return true;
308 /* Holds information about an input operand after some sign changes
309 and type promotions have been peeled away. */
310 class vect_unpromoted_value {
311 public:
312 vect_unpromoted_value ();
314 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
316 /* The value obtained after peeling away zero or more casts. */
317 tree op;
319 /* The type of OP. */
320 tree type;
322 /* The definition type of OP. */
323 vect_def_type dt;
325 /* If OP is the result of peeling at least one cast, and if the cast
326 of OP itself is a vectorizable statement, CASTER identifies that
327 statement, otherwise it is null. */
328 stmt_vec_info caster;
331 inline vect_unpromoted_value::vect_unpromoted_value ()
332 : op (NULL_TREE),
333 type (NULL_TREE),
334 dt (vect_uninitialized_def),
335 caster (NULL)
339 /* Set the operand to OP_IN, its definition type to DT_IN, and the
340 statement that casts it to CASTER_IN. */
342 inline void
343 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
344 stmt_vec_info caster_in)
346 op = op_in;
347 type = TREE_TYPE (op);
348 dt = dt_in;
349 caster = caster_in;
352 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
353 to reach some vectorizable inner operand OP', continuing as long as it
354 is possible to convert OP' back to OP using a possible sign change
355 followed by a possible promotion P. Return this OP', or null if OP is
356 not a vectorizable SSA name. If there is a promotion P, describe its
357 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
358 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
359 have more than one user.
361 A successful return means that it is possible to go from OP' to OP
362 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
363 whereas the cast from UNPROM to OP might be a promotion, a sign
364 change, or a nop.
366 E.g. say we have:
368 signed short *ptr = ...;
369 signed short C = *ptr;
370 unsigned short B = (unsigned short) C; // sign change
371 signed int A = (signed int) B; // unsigned promotion
372 ...possible other uses of A...
373 unsigned int OP = (unsigned int) A; // sign change
375 In this case it's possible to go directly from C to OP using:
377 OP = (unsigned int) (unsigned short) C;
378 +------------+ +--------------+
379 promotion sign change
381 so OP' would be C. The input to the promotion is B, so UNPROM
382 would describe B. */
384 static tree
385 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
386 vect_unpromoted_value *unprom,
387 bool *single_use_p = NULL)
389 tree res = NULL_TREE;
390 tree op_type = TREE_TYPE (op);
391 unsigned int orig_precision = TYPE_PRECISION (op_type);
392 unsigned int min_precision = orig_precision;
393 stmt_vec_info caster = NULL;
394 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
396 /* See whether OP is simple enough to vectorize. */
397 stmt_vec_info def_stmt_info;
398 gimple *def_stmt;
399 vect_def_type dt;
400 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
401 break;
403 /* If OP is the input of a demotion, skip over it to see whether
404 OP is itself the result of a promotion. If so, the combined
405 effect of the promotion and the demotion might fit the required
406 pattern, otherwise neither operation fits.
408 This copes with cases such as the result of an arithmetic
409 operation being truncated before being stored, and where that
410 arithmetic operation has been recognized as an over-widened one. */
411 if (TYPE_PRECISION (op_type) <= min_precision)
413 /* Use OP as the UNPROM described above if we haven't yet
414 found a promotion, or if using the new input preserves the
415 sign of the previous promotion. */
416 if (!res
417 || TYPE_PRECISION (unprom->type) == orig_precision
418 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
420 unprom->set_op (op, dt, caster);
421 min_precision = TYPE_PRECISION (op_type);
423 /* Stop if we've already seen a promotion and if this
424 conversion does more than change the sign. */
425 else if (TYPE_PRECISION (op_type)
426 != TYPE_PRECISION (unprom->type))
427 break;
429 /* The sequence now extends to OP. */
430 res = op;
433 /* See whether OP is defined by a cast. Record it as CASTER if
434 the cast is potentially vectorizable. */
435 if (!def_stmt)
436 break;
437 caster = def_stmt_info;
439 /* Ignore pattern statements, since we don't link uses for them. */
440 if (caster
441 && single_use_p
442 && !STMT_VINFO_RELATED_STMT (caster)
443 && !has_single_use (res))
444 *single_use_p = false;
446 gassign *assign = dyn_cast <gassign *> (def_stmt);
447 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
448 break;
450 /* Continue with the input to the cast. */
451 op = gimple_assign_rhs1 (def_stmt);
452 op_type = TREE_TYPE (op);
454 return res;
457 /* OP is an integer operand to an operation that returns TYPE, and we
458 want to treat the operation as a widening one. So far we can treat
459 it as widening from *COMMON_TYPE.
461 Return true if OP is suitable for such a widening operation,
462 either widening from *COMMON_TYPE or from some supertype of it.
463 Update *COMMON_TYPE to the supertype in the latter case.
465 SHIFT_P is true if OP is a shift amount. */
467 static bool
468 vect_joust_widened_integer (tree type, bool shift_p, tree op,
469 tree *common_type)
471 /* Calculate the minimum precision required by OP, without changing
472 the sign of either operand. */
473 unsigned int precision;
474 if (shift_p)
476 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
477 return false;
478 precision = TREE_INT_CST_LOW (op);
480 else
482 precision = wi::min_precision (wi::to_widest (op),
483 TYPE_SIGN (*common_type));
484 if (precision * 2 > TYPE_PRECISION (type))
485 return false;
488 /* If OP requires a wider type, switch to that type. The checks
489 above ensure that this is still narrower than the result. */
490 precision = vect_element_precision (precision);
491 if (TYPE_PRECISION (*common_type) < precision)
492 *common_type = build_nonstandard_integer_type
493 (precision, TYPE_UNSIGNED (*common_type));
494 return true;
497 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
498 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
500 static bool
501 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
503 if (types_compatible_p (*common_type, new_type))
504 return true;
506 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
507 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
508 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
509 return true;
511 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
512 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
513 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
515 *common_type = new_type;
516 return true;
519 /* We have mismatched signs, with the signed type being
520 no wider than the unsigned type. In this case we need
521 a wider signed type. */
522 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
523 TYPE_PRECISION (new_type));
524 precision *= 2;
526 if (precision * 2 > TYPE_PRECISION (type))
527 return false;
529 *common_type = build_nonstandard_integer_type (precision, false);
530 return true;
533 /* Check whether STMT_INFO can be viewed as a tree of integer operations
534 in which each node either performs CODE or WIDENED_CODE, and where
535 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
536 specifies the maximum number of leaf operands. SHIFT_P says whether
537 CODE and WIDENED_CODE are some sort of shift.
539 If STMT_INFO is such a tree, return the number of leaf operands
540 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
541 to a type that (a) is narrower than the result of STMT_INFO and
542 (b) can hold all leaf operand values.
544 If SUBTYPE then allow that the signs of the operands
545 may differ in signs but not in precision. SUBTYPE is updated to reflect
546 this.
548 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
549 exists. */
551 static unsigned int
552 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
553 tree_code widened_code, bool shift_p,
554 unsigned int max_nops,
555 vect_unpromoted_value *unprom, tree *common_type,
556 enum optab_subtype *subtype = NULL)
558 /* Check for an integer operation with the right code. */
559 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
560 if (!assign)
561 return 0;
563 tree_code rhs_code = gimple_assign_rhs_code (assign);
564 if (rhs_code != code && rhs_code != widened_code)
565 return 0;
567 tree type = TREE_TYPE (gimple_assign_lhs (assign));
568 if (!INTEGRAL_TYPE_P (type))
569 return 0;
571 /* Assume that both operands will be leaf operands. */
572 max_nops -= 2;
574 /* Check the operands. */
575 unsigned int next_op = 0;
576 for (unsigned int i = 0; i < 2; ++i)
578 vect_unpromoted_value *this_unprom = &unprom[next_op];
579 unsigned int nops = 1;
580 tree op = gimple_op (assign, i + 1);
581 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
583 /* We already have a common type from earlier operands.
584 Update it to account for OP. */
585 this_unprom->set_op (op, vect_constant_def);
586 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
587 return 0;
589 else
591 /* Only allow shifts by constants. */
592 if (shift_p && i == 1)
593 return 0;
595 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
596 return 0;
598 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
600 /* The operand isn't widened. If STMT_INFO has the code
601 for an unwidened operation, recursively check whether
602 this operand is a node of the tree. */
603 if (rhs_code != code
604 || max_nops == 0
605 || this_unprom->dt != vect_internal_def)
606 return 0;
608 /* Give back the leaf slot allocated above now that we're
609 not treating this as a leaf operand. */
610 max_nops += 1;
612 /* Recursively process the definition of the operand. */
613 stmt_vec_info def_stmt_info
614 = vinfo->lookup_def (this_unprom->op);
615 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
616 widened_code, shift_p, max_nops,
617 this_unprom, common_type,
618 subtype);
619 if (nops == 0)
620 return 0;
622 max_nops -= nops;
624 else
626 /* Make sure that the operand is narrower than the result. */
627 if (TYPE_PRECISION (this_unprom->type) * 2
628 > TYPE_PRECISION (type))
629 return 0;
631 /* Update COMMON_TYPE for the new operand. */
632 if (i == 0)
633 *common_type = this_unprom->type;
634 else if (!vect_joust_widened_type (type, this_unprom->type,
635 common_type))
637 if (subtype)
639 /* See if we can sign extend the smaller type. */
640 if (TYPE_PRECISION (this_unprom->type)
641 > TYPE_PRECISION (*common_type))
642 *common_type = this_unprom->type;
643 *subtype = optab_vector_mixed_sign;
645 else
646 return 0;
650 next_op += nops;
652 return next_op;
655 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
656 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
658 static tree
659 vect_recog_temp_ssa_var (tree type, gimple *stmt)
661 return make_temp_ssa_name (type, stmt, "patt");
664 /* STMT2_INFO describes a type conversion that could be split into STMT1
665 followed by a version of STMT2_INFO that takes NEW_RHS as its first
666 input. Try to do this using pattern statements, returning true on
667 success. */
669 static bool
670 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
671 gimple *stmt1, tree vectype)
673 if (is_pattern_stmt_p (stmt2_info))
675 /* STMT2_INFO is part of a pattern. Get the statement to which
676 the pattern is attached. */
677 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
678 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location,
682 "Splitting pattern statement: %G", stmt2_info->stmt);
684 /* Since STMT2_INFO is a pattern statement, we can change it
685 in-situ without worrying about changing the code for the
686 containing block. */
687 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
689 if (dump_enabled_p ())
691 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
692 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
693 stmt2_info->stmt);
696 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
697 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
698 /* STMT2_INFO is the actual pattern statement. Add STMT1
699 to the end of the definition sequence. */
700 gimple_seq_add_stmt_without_update (def_seq, stmt1);
701 else
703 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
704 before it. */
705 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
706 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
708 return true;
710 else
712 /* STMT2_INFO doesn't yet have a pattern. Try to create a
713 two-statement pattern now. */
714 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
715 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
716 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
717 if (!lhs_vectype)
718 return false;
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_NOTE, vect_location,
722 "Splitting statement: %G", stmt2_info->stmt);
724 /* Add STMT1 as a singleton pattern definition sequence. */
725 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
726 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
727 gimple_seq_add_stmt_without_update (def_seq, stmt1);
729 /* Build the second of the two pattern statements. */
730 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
731 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
732 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
734 if (dump_enabled_p ())
736 dump_printf_loc (MSG_NOTE, vect_location,
737 "into pattern statements: %G", stmt1);
738 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
741 return true;
745 /* Convert UNPROM to TYPE and return the result, adding new statements
746 to STMT_INFO's pattern definition statements if no better way is
747 available. VECTYPE is the vector form of TYPE.
749 If SUBTYPE then convert the type based on the subtype. */
751 static tree
752 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
753 vect_unpromoted_value *unprom, tree vectype,
754 enum optab_subtype subtype = optab_default)
757 /* Update the type if the signs differ. */
758 if (subtype == optab_vector_mixed_sign
759 && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
760 type = build_nonstandard_integer_type (TYPE_PRECISION (type),
761 TYPE_SIGN (unprom->type));
763 /* Check for a no-op conversion. */
764 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
765 return unprom->op;
767 /* Allow the caller to create constant vect_unpromoted_values. */
768 if (TREE_CODE (unprom->op) == INTEGER_CST)
769 return wide_int_to_tree (type, wi::to_widest (unprom->op));
771 tree input = unprom->op;
772 if (unprom->caster)
774 tree lhs = gimple_get_lhs (unprom->caster->stmt);
775 tree lhs_type = TREE_TYPE (lhs);
777 /* If the result of the existing cast is the right width, use it
778 instead of the source of the cast. */
779 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
780 input = lhs;
781 /* If the precision we want is between the source and result
782 precisions of the existing cast, try splitting the cast into
783 two and tapping into a mid-way point. */
784 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
785 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
787 /* In order to preserve the semantics of the original cast,
788 give the mid-way point the same signedness as the input value.
790 It would be possible to use a signed type here instead if
791 TYPE is signed and UNPROM->TYPE is unsigned, but that would
792 make the sign of the midtype sensitive to the order in
793 which we process the statements, since the signedness of
794 TYPE is the signedness required by just one of possibly
795 many users. Also, unsigned promotions are usually as cheap
796 as or cheaper than signed ones, so it's better to keep an
797 unsigned promotion. */
798 tree midtype = build_nonstandard_integer_type
799 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
800 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
801 if (vec_midtype)
803 input = vect_recog_temp_ssa_var (midtype, NULL);
804 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
805 unprom->op);
806 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
807 vec_midtype))
808 append_pattern_def_seq (vinfo, stmt_info,
809 new_stmt, vec_midtype);
813 /* See if we can reuse an existing result. */
814 if (types_compatible_p (type, TREE_TYPE (input)))
815 return input;
818 /* We need a new conversion statement. */
819 tree new_op = vect_recog_temp_ssa_var (type, NULL);
820 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
822 /* If OP is an external value, see if we can insert the new statement
823 on an incoming edge. */
824 if (input == unprom->op && unprom->dt == vect_external_def)
825 if (edge e = vect_get_external_def_edge (vinfo, input))
827 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
828 gcc_assert (!new_bb);
829 return new_op;
832 /* As a (common) last resort, add the statement to the pattern itself. */
833 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
834 return new_op;
837 /* Invoke vect_convert_input for N elements of UNPROM and store the
838 result in the corresponding elements of RESULT.
840 If SUBTYPE then convert the type based on the subtype. */
842 static void
843 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
844 tree *result, tree type, vect_unpromoted_value *unprom,
845 tree vectype, enum optab_subtype subtype = optab_default)
847 for (unsigned int i = 0; i < n; ++i)
849 unsigned int j;
850 for (j = 0; j < i; ++j)
851 if (unprom[j].op == unprom[i].op)
852 break;
854 if (j < i)
855 result[i] = result[j];
856 else
857 result[i] = vect_convert_input (vinfo, stmt_info,
858 type, &unprom[i], vectype, subtype);
862 /* The caller has created a (possibly empty) sequence of pattern definition
863 statements followed by a single statement PATTERN_STMT. Cast the result
864 of this final statement to TYPE. If a new statement is needed, add
865 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
866 and return the new statement, otherwise return PATTERN_STMT as-is.
867 VECITYPE is the vector form of PATTERN_STMT's result type. */
869 static gimple *
870 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
871 gimple *pattern_stmt, tree vecitype)
873 tree lhs = gimple_get_lhs (pattern_stmt);
874 if (!types_compatible_p (type, TREE_TYPE (lhs)))
876 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
877 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
878 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
880 return pattern_stmt;
883 /* Return true if STMT_VINFO describes a reduction for which reassociation
884 is allowed. If STMT_INFO is part of a group, assume that it's part of
885 a reduction chain and optimistically assume that all statements
886 except the last allow reassociation.
887 Also require it to have code CODE and to be a reduction
888 in the outermost loop. When returning true, store the operands in
889 *OP0_OUT and *OP1_OUT. */
891 static bool
892 vect_reassociating_reduction_p (vec_info *vinfo,
893 stmt_vec_info stmt_info, tree_code code,
894 tree *op0_out, tree *op1_out)
896 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
897 if (!loop_info)
898 return false;
900 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
901 if (!assign || gimple_assign_rhs_code (assign) != code)
902 return false;
904 /* We don't allow changing the order of the computation in the inner-loop
905 when doing outer-loop vectorization. */
906 class loop *loop = LOOP_VINFO_LOOP (loop_info);
907 if (loop && nested_in_vect_loop_p (loop, stmt_info))
908 return false;
910 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
912 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
913 code))
914 return false;
916 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
917 return false;
919 *op0_out = gimple_assign_rhs1 (assign);
920 *op1_out = gimple_assign_rhs2 (assign);
921 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
922 std::swap (*op0_out, *op1_out);
923 return true;
926 /* Function vect_recog_dot_prod_pattern
928 Try to find the following pattern:
930 type1a x_t
931 type1b y_t;
932 TYPE1 prod;
933 TYPE2 sum = init;
934 loop:
935 sum_0 = phi <init, sum_1>
936 S1 x_t = ...
937 S2 y_t = ...
938 S3 x_T = (TYPE1) x_t;
939 S4 y_T = (TYPE1) y_t;
940 S5 prod = x_T * y_T;
941 [S6 prod = (TYPE2) prod; #optional]
942 S7 sum_1 = prod + sum_0;
944 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
945 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
946 'type1a' and 'type1b' can differ.
948 Input:
950 * STMT_VINFO: The stmt from which the pattern search begins. In the
951 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
952 will be detected.
954 Output:
956 * TYPE_OUT: The type of the output of this pattern.
958 * Return value: A new stmt that will be used to replace the sequence of
959 stmts that constitute the pattern. In this case it will be:
960 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
962 Note: The dot-prod idiom is a widening reduction pattern that is
963 vectorized without preserving all the intermediate results. It
964 produces only N/2 (widened) results (by summing up pairs of
965 intermediate results) rather than all N results. Therefore, we
966 cannot allow this pattern when we want to get all the results and in
967 the correct order (as is the case when this computation is in an
968 inner-loop nested in an outer-loop that us being vectorized). */
970 static gimple *
971 vect_recog_dot_prod_pattern (vec_info *vinfo,
972 stmt_vec_info stmt_vinfo, tree *type_out)
974 tree oprnd0, oprnd1;
975 gimple *last_stmt = stmt_vinfo->stmt;
976 tree type, half_type;
977 gimple *pattern_stmt;
978 tree var;
980 /* Look for the following pattern
981 DX = (TYPE1) X;
982 DY = (TYPE1) Y;
983 DPROD = DX * DY;
984 DDPROD = (TYPE2) DPROD;
985 sum_1 = DDPROD + sum_0;
986 In which
987 - DX is double the size of X
988 - DY is double the size of Y
989 - DX, DY, DPROD all have the same type but the sign
990 between X, Y and DPROD can differ.
991 - sum is the same size of DPROD or bigger
992 - sum has been recognized as a reduction variable.
994 This is equivalent to:
995 DPROD = X w* Y; #widen mult
996 sum_1 = DPROD w+ sum_0; #widen summation
998 DPROD = X w* Y; #widen mult
999 sum_1 = DPROD + sum_0; #summation
1002 /* Starting from LAST_STMT, follow the defs of its uses in search
1003 of the above pattern. */
1005 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1006 &oprnd0, &oprnd1))
1007 return NULL;
1009 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1011 vect_unpromoted_value unprom_mult;
1012 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1014 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1015 we know that oprnd1 is the reduction variable (defined by a loop-header
1016 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1017 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1018 if (!oprnd0)
1019 return NULL;
1021 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1022 if (!mult_vinfo)
1023 return NULL;
1025 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1026 inside the loop (in case we are analyzing an outer-loop). */
1027 vect_unpromoted_value unprom0[2];
1028 enum optab_subtype subtype = optab_vector;
1029 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1030 false, 2, unprom0, &half_type, &subtype))
1031 return NULL;
1033 /* If there are two widening operations, make sure they agree on the sign
1034 of the extension. The result of an optab_vector_mixed_sign operation
1035 is signed; otherwise, the result has the same sign as the operands. */
1036 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1037 && (subtype == optab_vector_mixed_sign
1038 ? TYPE_UNSIGNED (unprom_mult.type)
1039 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1040 return NULL;
1042 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1044 tree half_vectype;
1045 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1046 type_out, &half_vectype, subtype))
1047 return NULL;
1049 /* Get the inputs in the appropriate types. */
1050 tree mult_oprnd[2];
1051 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1052 unprom0, half_vectype, subtype);
1054 var = vect_recog_temp_ssa_var (type, NULL);
1055 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1056 mult_oprnd[0], mult_oprnd[1], oprnd1);
1058 return pattern_stmt;
1062 /* Function vect_recog_sad_pattern
1064 Try to find the following Sum of Absolute Difference (SAD) pattern:
1066 type x_t, y_t;
1067 signed TYPE1 diff, abs_diff;
1068 TYPE2 sum = init;
1069 loop:
1070 sum_0 = phi <init, sum_1>
1071 S1 x_t = ...
1072 S2 y_t = ...
1073 S3 x_T = (TYPE1) x_t;
1074 S4 y_T = (TYPE1) y_t;
1075 S5 diff = x_T - y_T;
1076 S6 abs_diff = ABS_EXPR <diff>;
1077 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1078 S8 sum_1 = abs_diff + sum_0;
1080 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1081 same size of 'TYPE1' or bigger. This is a special case of a reduction
1082 computation.
1084 Input:
1086 * STMT_VINFO: The stmt from which the pattern search begins. In the
1087 example, when this function is called with S8, the pattern
1088 {S3,S4,S5,S6,S7,S8} will be detected.
1090 Output:
1092 * TYPE_OUT: The type of the output of this pattern.
1094 * Return value: A new stmt that will be used to replace the sequence of
1095 stmts that constitute the pattern. In this case it will be:
1096 SAD_EXPR <x_t, y_t, sum_0>
1099 static gimple *
1100 vect_recog_sad_pattern (vec_info *vinfo,
1101 stmt_vec_info stmt_vinfo, tree *type_out)
1103 gimple *last_stmt = stmt_vinfo->stmt;
1104 tree half_type;
1106 /* Look for the following pattern
1107 DX = (TYPE1) X;
1108 DY = (TYPE1) Y;
1109 DDIFF = DX - DY;
1110 DAD = ABS_EXPR <DDIFF>;
1111 DDPROD = (TYPE2) DPROD;
1112 sum_1 = DAD + sum_0;
1113 In which
1114 - DX is at least double the size of X
1115 - DY is at least double the size of Y
1116 - DX, DY, DDIFF, DAD all have the same type
1117 - sum is the same size of DAD or bigger
1118 - sum has been recognized as a reduction variable.
1120 This is equivalent to:
1121 DDIFF = X w- Y; #widen sub
1122 DAD = ABS_EXPR <DDIFF>;
1123 sum_1 = DAD w+ sum_0; #widen summation
1125 DDIFF = X w- Y; #widen sub
1126 DAD = ABS_EXPR <DDIFF>;
1127 sum_1 = DAD + sum_0; #summation
1130 /* Starting from LAST_STMT, follow the defs of its uses in search
1131 of the above pattern. */
1133 tree plus_oprnd0, plus_oprnd1;
1134 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1135 &plus_oprnd0, &plus_oprnd1))
1136 return NULL;
1138 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1140 /* Any non-truncating sequence of conversions is OK here, since
1141 with a successful match, the result of the ABS(U) is known to fit
1142 within the nonnegative range of the result type. (It cannot be the
1143 negative of the minimum signed value due to the range of the widening
1144 MINUS_EXPR.) */
1145 vect_unpromoted_value unprom_abs;
1146 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1147 &unprom_abs);
1149 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1150 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1151 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1152 Then check that plus_oprnd0 is defined by an abs_expr. */
1154 if (!plus_oprnd0)
1155 return NULL;
1157 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1158 if (!abs_stmt_vinfo)
1159 return NULL;
1161 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1162 inside the loop (in case we are analyzing an outer-loop). */
1163 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1164 if (!abs_stmt
1165 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1166 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1167 return NULL;
1169 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1170 tree abs_type = TREE_TYPE (abs_oprnd);
1171 if (TYPE_UNSIGNED (abs_type))
1172 return NULL;
1174 /* Peel off conversions from the ABS input. This can involve sign
1175 changes (e.g. from an unsigned subtraction to a signed ABS input)
1176 or signed promotion, but it can't include unsigned promotion.
1177 (Note that ABS of an unsigned promotion should have been folded
1178 away before now anyway.) */
1179 vect_unpromoted_value unprom_diff;
1180 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1181 &unprom_diff);
1182 if (!abs_oprnd)
1183 return NULL;
1184 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1185 && TYPE_UNSIGNED (unprom_diff.type))
1186 return NULL;
1188 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1189 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1190 if (!diff_stmt_vinfo)
1191 return NULL;
1193 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1194 inside the loop (in case we are analyzing an outer-loop). */
1195 vect_unpromoted_value unprom[2];
1196 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1197 false, 2, unprom, &half_type))
1198 return NULL;
1200 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1202 tree half_vectype;
1203 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1204 type_out, &half_vectype))
1205 return NULL;
1207 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1208 tree sad_oprnd[2];
1209 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1210 unprom, half_vectype);
1212 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1213 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1214 sad_oprnd[1], plus_oprnd1);
1216 return pattern_stmt;
1219 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1220 so that it can be treated as though it had the form:
1222 A_TYPE a;
1223 B_TYPE b;
1224 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1225 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1226 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1227 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1228 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1230 Try to replace the pattern with:
1232 A_TYPE a;
1233 B_TYPE b;
1234 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1235 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1236 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1237 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1239 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1241 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1242 name of the pattern being matched, for dump purposes. */
1244 static gimple *
1245 vect_recog_widen_op_pattern (vec_info *vinfo,
1246 stmt_vec_info last_stmt_info, tree *type_out,
1247 tree_code orig_code, tree_code wide_code,
1248 bool shift_p, const char *name)
1250 gimple *last_stmt = last_stmt_info->stmt;
1252 vect_unpromoted_value unprom[2];
1253 tree half_type;
1254 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1255 shift_p, 2, unprom, &half_type))
1256 return NULL;
1258 /* Pattern detected. */
1259 vect_pattern_detected (name, last_stmt);
1261 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1262 tree itype = type;
1263 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1264 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1265 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1266 TYPE_UNSIGNED (half_type));
1268 /* Check target support */
1269 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1270 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1271 enum tree_code dummy_code;
1272 int dummy_int;
1273 auto_vec<tree> dummy_vec;
1274 if (!vectype
1275 || !vecitype
1276 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1277 vecitype, vectype,
1278 &dummy_code, &dummy_code,
1279 &dummy_int, &dummy_vec))
1280 return NULL;
1282 *type_out = get_vectype_for_scalar_type (vinfo, type);
1283 if (!*type_out)
1284 return NULL;
1286 tree oprnd[2];
1287 vect_convert_inputs (vinfo, last_stmt_info,
1288 2, oprnd, half_type, unprom, vectype);
1290 tree var = vect_recog_temp_ssa_var (itype, NULL);
1291 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1292 oprnd[0], oprnd[1]);
1294 return vect_convert_output (vinfo, last_stmt_info,
1295 type, pattern_stmt, vecitype);
1298 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1299 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1301 static gimple *
1302 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1303 tree *type_out)
1305 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1306 MULT_EXPR, WIDEN_MULT_EXPR, false,
1307 "vect_recog_widen_mult_pattern");
1310 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1311 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1313 static gimple *
1314 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1315 tree *type_out)
1317 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1318 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1319 "vect_recog_widen_plus_pattern");
1322 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1323 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1324 static gimple *
1325 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1326 tree *type_out)
1328 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1329 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1330 "vect_recog_widen_minus_pattern");
1333 /* Function vect_recog_popcount_pattern
1335 Try to find the following pattern:
1337 UTYPE1 A;
1338 TYPE1 B;
1339 UTYPE2 temp_in;
1340 TYPE3 temp_out;
1341 temp_in = (UTYPE2)A;
1343 temp_out = __builtin_popcount{,l,ll} (temp_in);
1344 B = (TYPE1) temp_out;
1346 TYPE2 may or may not be equal to TYPE3.
1347 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1348 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1350 Input:
1352 * STMT_VINFO: The stmt from which the pattern search begins.
1353 here it starts with B = (TYPE1) temp_out;
1355 Output:
1357 * TYPE_OUT: The vector type of the output of this pattern.
1359 * Return value: A new stmt that will be used to replace the sequence of
1360 stmts that constitute the pattern. In this case it will be:
1361 B = .POPCOUNT (A);
1364 static gimple *
1365 vect_recog_popcount_pattern (vec_info *vinfo,
1366 stmt_vec_info stmt_vinfo, tree *type_out)
1368 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1369 gimple *popcount_stmt, *pattern_stmt;
1370 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1371 auto_vec<tree> vargs;
1373 /* Find B = (TYPE1) temp_out. */
1374 if (!last_stmt)
1375 return NULL;
1376 tree_code code = gimple_assign_rhs_code (last_stmt);
1377 if (!CONVERT_EXPR_CODE_P (code))
1378 return NULL;
1380 lhs_oprnd = gimple_assign_lhs (last_stmt);
1381 lhs_type = TREE_TYPE (lhs_oprnd);
1382 if (!INTEGRAL_TYPE_P (lhs_type))
1383 return NULL;
1385 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1386 if (TREE_CODE (rhs_oprnd) != SSA_NAME
1387 || !has_single_use (rhs_oprnd))
1388 return NULL;
1389 popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1391 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1392 if (!is_gimple_call (popcount_stmt))
1393 return NULL;
1394 switch (gimple_call_combined_fn (popcount_stmt))
1396 CASE_CFN_POPCOUNT:
1397 break;
1398 default:
1399 return NULL;
1402 if (gimple_call_num_args (popcount_stmt) != 1)
1403 return NULL;
1405 rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1406 vect_unpromoted_value unprom_diff;
1407 rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1408 &unprom_diff);
1410 if (!rhs_origin)
1411 return NULL;
1413 /* Input and output of .POPCOUNT should be same-precision integer.
1414 Also A should be unsigned or same precision as temp_in,
1415 otherwise there would be sign_extend from A to temp_in. */
1416 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1417 || (!TYPE_UNSIGNED (unprom_diff.type)
1418 && (TYPE_PRECISION (unprom_diff.type)
1419 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1420 return NULL;
1421 vargs.safe_push (unprom_diff.op);
1423 vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1424 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1425 /* Do it only if the backend has popcount<vector_mode>2 pattern. */
1426 if (!vec_type
1427 || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1428 OPTIMIZE_FOR_SPEED))
1429 return NULL;
1431 /* Create B = .POPCOUNT (A). */
1432 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1433 pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1434 gimple_call_set_lhs (pattern_stmt, new_var);
1435 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1436 *type_out = vec_type;
1438 if (dump_enabled_p ())
1439 dump_printf_loc (MSG_NOTE, vect_location,
1440 "created pattern stmt: %G", pattern_stmt);
1441 return pattern_stmt;
1444 /* Function vect_recog_pow_pattern
1446 Try to find the following pattern:
1448 x = POW (y, N);
1450 with POW being one of pow, powf, powi, powif and N being
1451 either 2 or 0.5.
1453 Input:
1455 * STMT_VINFO: The stmt from which the pattern search begins.
1457 Output:
1459 * TYPE_OUT: The type of the output of this pattern.
1461 * Return value: A new stmt that will be used to replace the sequence of
1462 stmts that constitute the pattern. In this case it will be:
1463 x = x * x
1465 x = sqrt (x)
1468 static gimple *
1469 vect_recog_pow_pattern (vec_info *vinfo,
1470 stmt_vec_info stmt_vinfo, tree *type_out)
1472 gimple *last_stmt = stmt_vinfo->stmt;
1473 tree base, exp;
1474 gimple *stmt;
1475 tree var;
1477 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1478 return NULL;
1480 switch (gimple_call_combined_fn (last_stmt))
1482 CASE_CFN_POW:
1483 CASE_CFN_POWI:
1484 break;
1486 default:
1487 return NULL;
1490 base = gimple_call_arg (last_stmt, 0);
1491 exp = gimple_call_arg (last_stmt, 1);
1492 if (TREE_CODE (exp) != REAL_CST
1493 && TREE_CODE (exp) != INTEGER_CST)
1495 if (flag_unsafe_math_optimizations
1496 && TREE_CODE (base) == REAL_CST
1497 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1499 combined_fn log_cfn;
1500 built_in_function exp_bfn;
1501 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1503 case BUILT_IN_POW:
1504 log_cfn = CFN_BUILT_IN_LOG;
1505 exp_bfn = BUILT_IN_EXP;
1506 break;
1507 case BUILT_IN_POWF:
1508 log_cfn = CFN_BUILT_IN_LOGF;
1509 exp_bfn = BUILT_IN_EXPF;
1510 break;
1511 case BUILT_IN_POWL:
1512 log_cfn = CFN_BUILT_IN_LOGL;
1513 exp_bfn = BUILT_IN_EXPL;
1514 break;
1515 default:
1516 return NULL;
1518 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1519 tree exp_decl = builtin_decl_implicit (exp_bfn);
1520 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1521 does that, but if C is a power of 2, we want to use
1522 exp2 (log2 (C) * x) in the non-vectorized version, but for
1523 vectorization we don't have vectorized exp2. */
1524 if (logc
1525 && TREE_CODE (logc) == REAL_CST
1526 && exp_decl
1527 && lookup_attribute ("omp declare simd",
1528 DECL_ATTRIBUTES (exp_decl)))
1530 cgraph_node *node = cgraph_node::get_create (exp_decl);
1531 if (node->simd_clones == NULL)
1533 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1534 || node->definition)
1535 return NULL;
1536 expand_simd_clones (node);
1537 if (node->simd_clones == NULL)
1538 return NULL;
1540 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1541 if (!*type_out)
1542 return NULL;
1543 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1544 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1545 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1546 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1547 g = gimple_build_call (exp_decl, 1, def);
1548 gimple_call_set_lhs (g, res);
1549 return g;
1553 return NULL;
1556 /* We now have a pow or powi builtin function call with a constant
1557 exponent. */
1559 /* Catch squaring. */
1560 if ((tree_fits_shwi_p (exp)
1561 && tree_to_shwi (exp) == 2)
1562 || (TREE_CODE (exp) == REAL_CST
1563 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1565 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1566 TREE_TYPE (base), type_out))
1567 return NULL;
1569 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1570 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1571 return stmt;
1574 /* Catch square root. */
1575 if (TREE_CODE (exp) == REAL_CST
1576 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1578 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1579 if (*type_out
1580 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1581 OPTIMIZE_FOR_SPEED))
1583 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1584 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1585 gimple_call_set_lhs (stmt, var);
1586 gimple_call_set_nothrow (stmt, true);
1587 return stmt;
1591 return NULL;
1595 /* Function vect_recog_widen_sum_pattern
1597 Try to find the following pattern:
1599 type x_t;
1600 TYPE x_T, sum = init;
1601 loop:
1602 sum_0 = phi <init, sum_1>
1603 S1 x_t = *p;
1604 S2 x_T = (TYPE) x_t;
1605 S3 sum_1 = x_T + sum_0;
1607 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1608 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1609 a special case of a reduction computation.
1611 Input:
1613 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1614 when this function is called with S3, the pattern {S2,S3} will be detected.
1616 Output:
1618 * TYPE_OUT: The type of the output of this pattern.
1620 * Return value: A new stmt that will be used to replace the sequence of
1621 stmts that constitute the pattern. In this case it will be:
1622 WIDEN_SUM <x_t, sum_0>
1624 Note: The widening-sum idiom is a widening reduction pattern that is
1625 vectorized without preserving all the intermediate results. It
1626 produces only N/2 (widened) results (by summing up pairs of
1627 intermediate results) rather than all N results. Therefore, we
1628 cannot allow this pattern when we want to get all the results and in
1629 the correct order (as is the case when this computation is in an
1630 inner-loop nested in an outer-loop that us being vectorized). */
1632 static gimple *
1633 vect_recog_widen_sum_pattern (vec_info *vinfo,
1634 stmt_vec_info stmt_vinfo, tree *type_out)
1636 gimple *last_stmt = stmt_vinfo->stmt;
1637 tree oprnd0, oprnd1;
1638 tree type;
1639 gimple *pattern_stmt;
1640 tree var;
1642 /* Look for the following pattern
1643 DX = (TYPE) X;
1644 sum_1 = DX + sum_0;
1645 In which DX is at least double the size of X, and sum_1 has been
1646 recognized as a reduction variable.
1649 /* Starting from LAST_STMT, follow the defs of its uses in search
1650 of the above pattern. */
1652 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1653 &oprnd0, &oprnd1))
1654 return NULL;
1656 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1658 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1659 we know that oprnd1 is the reduction variable (defined by a loop-header
1660 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1661 Left to check that oprnd0 is defined by a cast from type 'type' to type
1662 'TYPE'. */
1664 vect_unpromoted_value unprom0;
1665 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1666 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1667 return NULL;
1669 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1671 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1672 unprom0.type, type_out))
1673 return NULL;
1675 var = vect_recog_temp_ssa_var (type, NULL);
1676 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1678 return pattern_stmt;
1681 /* Recognize cases in which an operation is performed in one type WTYPE
1682 but could be done more efficiently in a narrower type NTYPE. For example,
1683 if we have:
1685 ATYPE a; // narrower than NTYPE
1686 BTYPE b; // narrower than NTYPE
1687 WTYPE aw = (WTYPE) a;
1688 WTYPE bw = (WTYPE) b;
1689 WTYPE res = aw + bw; // only uses of aw and bw
1691 then it would be more efficient to do:
1693 NTYPE an = (NTYPE) a;
1694 NTYPE bn = (NTYPE) b;
1695 NTYPE resn = an + bn;
1696 WTYPE res = (WTYPE) resn;
1698 Other situations include things like:
1700 ATYPE a; // NTYPE or narrower
1701 WTYPE aw = (WTYPE) a;
1702 WTYPE res = aw + b;
1704 when only "(NTYPE) res" is significant. In that case it's more efficient
1705 to truncate "b" and do the operation on NTYPE instead:
1707 NTYPE an = (NTYPE) a;
1708 NTYPE bn = (NTYPE) b; // truncation
1709 NTYPE resn = an + bn;
1710 WTYPE res = (WTYPE) resn;
1712 All users of "res" should then use "resn" instead, making the final
1713 statement dead (not marked as relevant). The final statement is still
1714 needed to maintain the type correctness of the IR.
1716 vect_determine_precisions has already determined the minimum
1717 precison of the operation and the minimum precision required
1718 by users of the result. */
1720 static gimple *
1721 vect_recog_over_widening_pattern (vec_info *vinfo,
1722 stmt_vec_info last_stmt_info, tree *type_out)
1724 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1725 if (!last_stmt)
1726 return NULL;
1728 /* See whether we have found that this operation can be done on a
1729 narrower type without changing its semantics. */
1730 unsigned int new_precision = last_stmt_info->operation_precision;
1731 if (!new_precision)
1732 return NULL;
1734 tree lhs = gimple_assign_lhs (last_stmt);
1735 tree type = TREE_TYPE (lhs);
1736 tree_code code = gimple_assign_rhs_code (last_stmt);
1738 /* Punt for reductions where we don't handle the type conversions. */
1739 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1740 return NULL;
1742 /* Keep the first operand of a COND_EXPR as-is: only the other two
1743 operands are interesting. */
1744 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1746 /* Check the operands. */
1747 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1748 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1749 unprom.quick_grow (nops);
1750 unsigned int min_precision = 0;
1751 bool single_use_p = false;
1752 for (unsigned int i = 0; i < nops; ++i)
1754 tree op = gimple_op (last_stmt, first_op + i);
1755 if (TREE_CODE (op) == INTEGER_CST)
1756 unprom[i].set_op (op, vect_constant_def);
1757 else if (TREE_CODE (op) == SSA_NAME)
1759 bool op_single_use_p = true;
1760 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1761 &op_single_use_p))
1762 return NULL;
1763 /* If:
1765 (1) N bits of the result are needed;
1766 (2) all inputs are widened from M<N bits; and
1767 (3) one operand OP is a single-use SSA name
1769 we can shift the M->N widening from OP to the output
1770 without changing the number or type of extensions involved.
1771 This then reduces the number of copies of STMT_INFO.
1773 If instead of (3) more than one operand is a single-use SSA name,
1774 shifting the extension to the output is even more of a win.
1776 If instead:
1778 (1) N bits of the result are needed;
1779 (2) one operand OP2 is widened from M2<N bits;
1780 (3) another operand OP1 is widened from M1<M2 bits; and
1781 (4) both OP1 and OP2 are single-use
1783 the choice is between:
1785 (a) truncating OP2 to M1, doing the operation on M1,
1786 and then widening the result to N
1788 (b) widening OP1 to M2, doing the operation on M2, and then
1789 widening the result to N
1791 Both shift the M2->N widening of the inputs to the output.
1792 (a) additionally shifts the M1->M2 widening to the output;
1793 it requires fewer copies of STMT_INFO but requires an extra
1794 M2->M1 truncation.
1796 Which is better will depend on the complexity and cost of
1797 STMT_INFO, which is hard to predict at this stage. However,
1798 a clear tie-breaker in favor of (b) is the fact that the
1799 truncation in (a) increases the length of the operation chain.
1801 If instead of (4) only one of OP1 or OP2 is single-use,
1802 (b) is still a win over doing the operation in N bits:
1803 it still shifts the M2->N widening on the single-use operand
1804 to the output and reduces the number of STMT_INFO copies.
1806 If neither operand is single-use then operating on fewer than
1807 N bits might lead to more extensions overall. Whether it does
1808 or not depends on global information about the vectorization
1809 region, and whether that's a good trade-off would again
1810 depend on the complexity and cost of the statements involved,
1811 as well as things like register pressure that are not normally
1812 modelled at this stage. We therefore ignore these cases
1813 and just optimize the clear single-use wins above.
1815 Thus we take the maximum precision of the unpromoted operands
1816 and record whether any operand is single-use. */
1817 if (unprom[i].dt == vect_internal_def)
1819 min_precision = MAX (min_precision,
1820 TYPE_PRECISION (unprom[i].type));
1821 single_use_p |= op_single_use_p;
1824 else
1825 return NULL;
1828 /* Although the operation could be done in operation_precision, we have
1829 to balance that against introducing extra truncations or extensions.
1830 Calculate the minimum precision that can be handled efficiently.
1832 The loop above determined that the operation could be handled
1833 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1834 extension from the inputs to the output without introducing more
1835 instructions, and would reduce the number of instructions required
1836 for STMT_INFO itself.
1838 vect_determine_precisions has also determined that the result only
1839 needs min_output_precision bits. Truncating by a factor of N times
1840 requires a tree of N - 1 instructions, so if TYPE is N times wider
1841 than min_output_precision, doing the operation in TYPE and truncating
1842 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1843 In contrast:
1845 - truncating the input to a unary operation and doing the operation
1846 in the new type requires at most N - 1 + 1 = N instructions per
1847 output vector
1849 - doing the same for a binary operation requires at most
1850 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1852 Both unary and binary operations require fewer instructions than
1853 this if the operands were extended from a suitable truncated form.
1854 Thus there is usually nothing to lose by doing operations in
1855 min_output_precision bits, but there can be something to gain. */
1856 if (!single_use_p)
1857 min_precision = last_stmt_info->min_output_precision;
1858 else
1859 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1861 /* Apply the minimum efficient precision we just calculated. */
1862 if (new_precision < min_precision)
1863 new_precision = min_precision;
1864 new_precision = vect_element_precision (new_precision);
1865 if (new_precision >= TYPE_PRECISION (type))
1866 return NULL;
1868 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1870 *type_out = get_vectype_for_scalar_type (vinfo, type);
1871 if (!*type_out)
1872 return NULL;
1874 /* We've found a viable pattern. Get the new type of the operation. */
1875 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1876 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1878 /* If we're truncating an operation, we need to make sure that we
1879 don't introduce new undefined overflow. The codes tested here are
1880 a subset of those accepted by vect_truncatable_operation_p. */
1881 tree op_type = new_type;
1882 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1883 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1884 op_type = build_nonstandard_integer_type (new_precision, true);
1886 /* We specifically don't check here whether the target supports the
1887 new operation, since it might be something that a later pattern
1888 wants to rewrite anyway. If targets have a minimum element size
1889 for some optabs, we should pattern-match smaller ops to larger ops
1890 where beneficial. */
1891 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1892 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1893 if (!new_vectype || !op_vectype)
1894 return NULL;
1896 if (dump_enabled_p ())
1897 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1898 type, new_type);
1900 /* Calculate the rhs operands for an operation on OP_TYPE. */
1901 tree ops[3] = {};
1902 for (unsigned int i = 1; i < first_op; ++i)
1903 ops[i - 1] = gimple_op (last_stmt, i);
1904 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
1905 op_type, &unprom[0], op_vectype);
1907 /* Use the operation to produce a result of type OP_TYPE. */
1908 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1909 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1910 ops[0], ops[1], ops[2]);
1911 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1913 if (dump_enabled_p ())
1914 dump_printf_loc (MSG_NOTE, vect_location,
1915 "created pattern stmt: %G", pattern_stmt);
1917 /* Convert back to the original signedness, if OP_TYPE is different
1918 from NEW_TYPE. */
1919 if (op_type != new_type)
1920 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
1921 pattern_stmt, op_vectype);
1923 /* Promote the result to the original type. */
1924 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
1925 pattern_stmt, new_vectype);
1927 return pattern_stmt;
1930 /* Recognize the following patterns:
1932 ATYPE a; // narrower than TYPE
1933 BTYPE b; // narrower than TYPE
1935 1) Multiply high with scaling
1936 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1937 Here, c is bitsize (TYPE) / 2 - 1.
1939 2) ... or also with rounding
1940 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1941 Here, d is bitsize (TYPE) / 2 - 2.
1943 3) Normal multiply high
1944 TYPE res = ((TYPE) a * (TYPE) b) >> e;
1945 Here, e is bitsize (TYPE) / 2.
1947 where only the bottom half of res is used. */
1949 static gimple *
1950 vect_recog_mulhs_pattern (vec_info *vinfo,
1951 stmt_vec_info last_stmt_info, tree *type_out)
1953 /* Check for a right shift. */
1954 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1955 if (!last_stmt
1956 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1957 return NULL;
1959 /* Check that the shift result is wider than the users of the
1960 result need (i.e. that narrowing would be a natural choice). */
1961 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1962 unsigned int target_precision
1963 = vect_element_precision (last_stmt_info->min_output_precision);
1964 if (!INTEGRAL_TYPE_P (lhs_type)
1965 || target_precision >= TYPE_PRECISION (lhs_type))
1966 return NULL;
1968 /* Look through any change in sign on the outer shift input. */
1969 vect_unpromoted_value unprom_rshift_input;
1970 tree rshift_input = vect_look_through_possible_promotion
1971 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1972 if (!rshift_input
1973 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1974 != TYPE_PRECISION (lhs_type))
1975 return NULL;
1977 /* Get the definition of the shift input. */
1978 stmt_vec_info rshift_input_stmt_info
1979 = vect_get_internal_def (vinfo, rshift_input);
1980 if (!rshift_input_stmt_info)
1981 return NULL;
1982 gassign *rshift_input_stmt
1983 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1984 if (!rshift_input_stmt)
1985 return NULL;
1987 stmt_vec_info mulh_stmt_info;
1988 tree scale_term;
1989 bool rounding_p = false;
1991 /* Check for the presence of the rounding term. */
1992 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1994 /* Check that the outer shift was by 1. */
1995 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1996 return NULL;
1998 /* Check that the second operand of the PLUS_EXPR is 1. */
1999 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2000 return NULL;
2002 /* Look through any change in sign on the addition input. */
2003 vect_unpromoted_value unprom_plus_input;
2004 tree plus_input = vect_look_through_possible_promotion
2005 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2006 if (!plus_input
2007 || TYPE_PRECISION (TREE_TYPE (plus_input))
2008 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2009 return NULL;
2011 /* Get the definition of the multiply-high-scale part. */
2012 stmt_vec_info plus_input_stmt_info
2013 = vect_get_internal_def (vinfo, plus_input);
2014 if (!plus_input_stmt_info)
2015 return NULL;
2016 gassign *plus_input_stmt
2017 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2018 if (!plus_input_stmt
2019 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2020 return NULL;
2022 /* Look through any change in sign on the scaling input. */
2023 vect_unpromoted_value unprom_scale_input;
2024 tree scale_input = vect_look_through_possible_promotion
2025 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2026 if (!scale_input
2027 || TYPE_PRECISION (TREE_TYPE (scale_input))
2028 != TYPE_PRECISION (TREE_TYPE (plus_input)))
2029 return NULL;
2031 /* Get the definition of the multiply-high part. */
2032 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2033 if (!mulh_stmt_info)
2034 return NULL;
2036 /* Get the scaling term. */
2037 scale_term = gimple_assign_rhs2 (plus_input_stmt);
2038 rounding_p = true;
2040 else
2042 mulh_stmt_info = rshift_input_stmt_info;
2043 scale_term = gimple_assign_rhs2 (last_stmt);
2046 /* Check that the scaling factor is constant. */
2047 if (TREE_CODE (scale_term) != INTEGER_CST)
2048 return NULL;
2050 /* Check whether the scaling input term can be seen as two widened
2051 inputs multiplied together. */
2052 vect_unpromoted_value unprom_mult[2];
2053 tree new_type;
2054 unsigned int nops
2055 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2056 false, 2, unprom_mult, &new_type);
2057 if (nops != 2)
2058 return NULL;
2060 /* Adjust output precision. */
2061 if (TYPE_PRECISION (new_type) < target_precision)
2062 new_type = build_nonstandard_integer_type
2063 (target_precision, TYPE_UNSIGNED (new_type));
2065 unsigned mult_precision = TYPE_PRECISION (new_type);
2066 internal_fn ifn;
2067 /* Check that the scaling factor is expected. Instead of
2068 target_precision, we should use the one that we actually
2069 use for internal function. */
2070 if (rounding_p)
2072 /* Check pattern 2). */
2073 if (wi::to_widest (scale_term) + mult_precision + 2
2074 != TYPE_PRECISION (lhs_type))
2075 return NULL;
2077 ifn = IFN_MULHRS;
2079 else
2081 /* Check for pattern 1). */
2082 if (wi::to_widest (scale_term) + mult_precision + 1
2083 == TYPE_PRECISION (lhs_type))
2084 ifn = IFN_MULHS;
2085 /* Check for pattern 3). */
2086 else if (wi::to_widest (scale_term) + mult_precision
2087 == TYPE_PRECISION (lhs_type))
2088 ifn = IFN_MULH;
2089 else
2090 return NULL;
2093 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2095 /* Check for target support. */
2096 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2097 if (!new_vectype
2098 || !direct_internal_fn_supported_p
2099 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2100 return NULL;
2102 /* The IR requires a valid vector type for the cast result, even though
2103 it's likely to be discarded. */
2104 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2105 if (!*type_out)
2106 return NULL;
2108 /* Generate the IFN_MULHRS call. */
2109 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2110 tree new_ops[2];
2111 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2112 unprom_mult, new_vectype);
2113 gcall *mulhrs_stmt
2114 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2115 gimple_call_set_lhs (mulhrs_stmt, new_var);
2116 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2118 if (dump_enabled_p ())
2119 dump_printf_loc (MSG_NOTE, vect_location,
2120 "created pattern stmt: %G", mulhrs_stmt);
2122 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2123 mulhrs_stmt, new_vectype);
2126 /* Recognize the patterns:
2128 ATYPE a; // narrower than TYPE
2129 BTYPE b; // narrower than TYPE
2130 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2131 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2133 where only the bottom half of avg is used. Try to transform them into:
2135 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2136 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2138 followed by:
2140 TYPE avg = (TYPE) avg';
2142 where NTYPE is no wider than half of TYPE. Since only the bottom half
2143 of avg is used, all or part of the cast of avg' should become redundant.
2145 If there is no target support available, generate code to distribute rshift
2146 over plus and add a carry. */
2148 static gimple *
2149 vect_recog_average_pattern (vec_info *vinfo,
2150 stmt_vec_info last_stmt_info, tree *type_out)
2152 /* Check for a shift right by one bit. */
2153 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2154 if (!last_stmt
2155 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2156 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2157 return NULL;
2159 /* Check that the shift result is wider than the users of the
2160 result need (i.e. that narrowing would be a natural choice). */
2161 tree lhs = gimple_assign_lhs (last_stmt);
2162 tree type = TREE_TYPE (lhs);
2163 unsigned int target_precision
2164 = vect_element_precision (last_stmt_info->min_output_precision);
2165 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2166 return NULL;
2168 /* Look through any change in sign on the shift input. */
2169 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2170 vect_unpromoted_value unprom_plus;
2171 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2172 &unprom_plus);
2173 if (!rshift_rhs
2174 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2175 return NULL;
2177 /* Get the definition of the shift input. */
2178 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2179 if (!plus_stmt_info)
2180 return NULL;
2182 /* Check whether the shift input can be seen as a tree of additions on
2183 2 or 3 widened inputs.
2185 Note that the pattern should be a win even if the result of one or
2186 more additions is reused elsewhere: if the pattern matches, we'd be
2187 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2188 internal_fn ifn = IFN_AVG_FLOOR;
2189 vect_unpromoted_value unprom[3];
2190 tree new_type;
2191 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2192 WIDEN_PLUS_EXPR, false, 3,
2193 unprom, &new_type);
2194 if (nops == 0)
2195 return NULL;
2196 if (nops == 3)
2198 /* Check that one operand is 1. */
2199 unsigned int i;
2200 for (i = 0; i < 3; ++i)
2201 if (integer_onep (unprom[i].op))
2202 break;
2203 if (i == 3)
2204 return NULL;
2205 /* Throw away the 1 operand and keep the other two. */
2206 if (i < 2)
2207 unprom[i] = unprom[2];
2208 ifn = IFN_AVG_CEIL;
2211 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2213 /* We know that:
2215 (a) the operation can be viewed as:
2217 TYPE widened0 = (TYPE) UNPROM[0];
2218 TYPE widened1 = (TYPE) UNPROM[1];
2219 TYPE tmp1 = widened0 + widened1 {+ 1};
2220 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2222 (b) the first two statements are equivalent to:
2224 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2225 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2227 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2228 where sensible;
2230 (d) all the operations can be performed correctly at twice the width of
2231 NEW_TYPE, due to the nature of the average operation; and
2233 (e) users of the result of the right shift need only TARGET_PRECISION
2234 bits, where TARGET_PRECISION is no more than half of TYPE's
2235 precision.
2237 Under these circumstances, the only situation in which NEW_TYPE
2238 could be narrower than TARGET_PRECISION is if widened0, widened1
2239 and an addition result are all used more than once. Thus we can
2240 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2241 as "free", whereas widening the result of the average instruction
2242 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2243 therefore better not to go narrower than TARGET_PRECISION. */
2244 if (TYPE_PRECISION (new_type) < target_precision)
2245 new_type = build_nonstandard_integer_type (target_precision,
2246 TYPE_UNSIGNED (new_type));
2248 /* Check for target support. */
2249 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2250 if (!new_vectype)
2251 return NULL;
2253 bool fallback_p = false;
2255 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2257 else if (TYPE_UNSIGNED (new_type)
2258 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2259 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2260 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2261 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2262 fallback_p = true;
2263 else
2264 return NULL;
2266 /* The IR requires a valid vector type for the cast result, even though
2267 it's likely to be discarded. */
2268 *type_out = get_vectype_for_scalar_type (vinfo, type);
2269 if (!*type_out)
2270 return NULL;
2272 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2273 tree new_ops[2];
2274 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2275 unprom, new_vectype);
2277 if (fallback_p)
2279 /* As a fallback, generate code for following sequence:
2281 shifted_op0 = new_ops[0] >> 1;
2282 shifted_op1 = new_ops[1] >> 1;
2283 sum_of_shifted = shifted_op0 + shifted_op1;
2284 unmasked_carry = new_ops[0] and/or new_ops[1];
2285 carry = unmasked_carry & 1;
2286 new_var = sum_of_shifted + carry;
2289 tree one_cst = build_one_cst (new_type);
2290 gassign *g;
2292 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2293 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2294 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2296 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2297 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2298 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2300 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2301 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2302 shifted_op0, shifted_op1);
2303 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2305 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2306 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2307 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2308 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2310 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2311 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2312 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2314 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2315 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2318 /* Generate the IFN_AVG* call. */
2319 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2320 new_ops[1]);
2321 gimple_call_set_lhs (average_stmt, new_var);
2322 gimple_set_location (average_stmt, gimple_location (last_stmt));
2324 if (dump_enabled_p ())
2325 dump_printf_loc (MSG_NOTE, vect_location,
2326 "created pattern stmt: %G", average_stmt);
2328 return vect_convert_output (vinfo, last_stmt_info,
2329 type, average_stmt, new_vectype);
2332 /* Recognize cases in which the input to a cast is wider than its
2333 output, and the input is fed by a widening operation. Fold this
2334 by removing the unnecessary intermediate widening. E.g.:
2336 unsigned char a;
2337 unsigned int b = (unsigned int) a;
2338 unsigned short c = (unsigned short) b;
2342 unsigned short c = (unsigned short) a;
2344 Although this is rare in input IR, it is an expected side-effect
2345 of the over-widening pattern above.
2347 This is beneficial also for integer-to-float conversions, if the
2348 widened integer has more bits than the float, and if the unwidened
2349 input doesn't. */
2351 static gimple *
2352 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2353 stmt_vec_info last_stmt_info, tree *type_out)
2355 /* Check for a cast, including an integer-to-float conversion. */
2356 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2357 if (!last_stmt)
2358 return NULL;
2359 tree_code code = gimple_assign_rhs_code (last_stmt);
2360 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2361 return NULL;
2363 /* Make sure that the rhs is a scalar with a natural bitsize. */
2364 tree lhs = gimple_assign_lhs (last_stmt);
2365 if (!lhs)
2366 return NULL;
2367 tree lhs_type = TREE_TYPE (lhs);
2368 scalar_mode lhs_mode;
2369 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2370 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2371 return NULL;
2373 /* Check for a narrowing operation (from a vector point of view). */
2374 tree rhs = gimple_assign_rhs1 (last_stmt);
2375 tree rhs_type = TREE_TYPE (rhs);
2376 if (!INTEGRAL_TYPE_P (rhs_type)
2377 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2378 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2379 return NULL;
2381 /* Try to find an unpromoted input. */
2382 vect_unpromoted_value unprom;
2383 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2384 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2385 return NULL;
2387 /* If the bits above RHS_TYPE matter, make sure that they're the
2388 same when extending from UNPROM as they are when extending from RHS. */
2389 if (!INTEGRAL_TYPE_P (lhs_type)
2390 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2391 return NULL;
2393 /* We can get the same result by casting UNPROM directly, to avoid
2394 the unnecessary widening and narrowing. */
2395 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2397 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2398 if (!*type_out)
2399 return NULL;
2401 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2402 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2403 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2405 return pattern_stmt;
2408 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2409 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2411 static gimple *
2412 vect_recog_widen_shift_pattern (vec_info *vinfo,
2413 stmt_vec_info last_stmt_info, tree *type_out)
2415 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2416 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2417 "vect_recog_widen_shift_pattern");
2420 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2422 type a_t, b_t, c_t;
2424 S0 a_t = b_t r<< c_t;
2426 Input/Output:
2428 * STMT_VINFO: The stmt from which the pattern search begins,
2429 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2430 with a sequence:
2432 S1 d_t = -c_t;
2433 S2 e_t = d_t & (B - 1);
2434 S3 f_t = b_t << c_t;
2435 S4 g_t = b_t >> e_t;
2436 S0 a_t = f_t | g_t;
2438 where B is element bitsize of type.
2440 Output:
2442 * TYPE_OUT: The type of the output of this pattern.
2444 * Return value: A new stmt that will be used to replace the rotate
2445 S0 stmt. */
2447 static gimple *
2448 vect_recog_rotate_pattern (vec_info *vinfo,
2449 stmt_vec_info stmt_vinfo, tree *type_out)
2451 gimple *last_stmt = stmt_vinfo->stmt;
2452 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2453 gimple *pattern_stmt, *def_stmt;
2454 enum tree_code rhs_code;
2455 enum vect_def_type dt;
2456 optab optab1, optab2;
2457 edge ext_def = NULL;
2458 bool bswap16_p = false;
2460 if (is_gimple_assign (last_stmt))
2462 rhs_code = gimple_assign_rhs_code (last_stmt);
2463 switch (rhs_code)
2465 case LROTATE_EXPR:
2466 case RROTATE_EXPR:
2467 break;
2468 default:
2469 return NULL;
2472 lhs = gimple_assign_lhs (last_stmt);
2473 oprnd0 = gimple_assign_rhs1 (last_stmt);
2474 type = TREE_TYPE (oprnd0);
2475 oprnd1 = gimple_assign_rhs2 (last_stmt);
2477 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2479 /* __builtin_bswap16 (x) is another form of x r>> 8.
2480 The vectorizer has bswap support, but only if the argument isn't
2481 promoted. */
2482 lhs = gimple_call_lhs (last_stmt);
2483 oprnd0 = gimple_call_arg (last_stmt, 0);
2484 type = TREE_TYPE (oprnd0);
2485 if (!lhs
2486 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2487 || TYPE_PRECISION (type) <= 16
2488 || TREE_CODE (oprnd0) != SSA_NAME
2489 || BITS_PER_UNIT != 8
2490 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2491 return NULL;
2493 stmt_vec_info def_stmt_info;
2494 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2495 return NULL;
2497 if (dt != vect_internal_def)
2498 return NULL;
2500 if (gimple_assign_cast_p (def_stmt))
2502 def = gimple_assign_rhs1 (def_stmt);
2503 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2504 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2505 oprnd0 = def;
2508 type = TREE_TYPE (lhs);
2509 vectype = get_vectype_for_scalar_type (vinfo, type);
2510 if (vectype == NULL_TREE)
2511 return NULL;
2513 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2515 /* The encoding uses one stepped pattern for each byte in the
2516 16-bit word. */
2517 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2518 for (unsigned i = 0; i < 3; ++i)
2519 for (unsigned j = 0; j < 2; ++j)
2520 elts.quick_push ((i + 1) * 2 - j - 1);
2522 vec_perm_indices indices (elts, 1,
2523 TYPE_VECTOR_SUBPARTS (char_vectype));
2524 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2526 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2527 undo the argument promotion. */
2528 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2530 def = vect_recog_temp_ssa_var (type, NULL);
2531 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2532 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2533 oprnd0 = def;
2536 /* Pattern detected. */
2537 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2539 *type_out = vectype;
2541 /* Pattern supported. Create a stmt to be used to replace the
2542 pattern, with the unpromoted argument. */
2543 var = vect_recog_temp_ssa_var (type, NULL);
2544 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2545 1, oprnd0);
2546 gimple_call_set_lhs (pattern_stmt, var);
2547 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2548 gimple_call_fntype (last_stmt));
2549 return pattern_stmt;
2553 oprnd1 = build_int_cst (integer_type_node, 8);
2554 rhs_code = LROTATE_EXPR;
2555 bswap16_p = true;
2557 else
2558 return NULL;
2560 if (TREE_CODE (oprnd0) != SSA_NAME
2561 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2562 || !INTEGRAL_TYPE_P (type)
2563 || !TYPE_UNSIGNED (type))
2564 return NULL;
2566 stmt_vec_info def_stmt_info;
2567 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2568 return NULL;
2570 if (dt != vect_internal_def
2571 && dt != vect_constant_def
2572 && dt != vect_external_def)
2573 return NULL;
2575 vectype = get_vectype_for_scalar_type (vinfo, type);
2576 if (vectype == NULL_TREE)
2577 return NULL;
2579 /* If vector/vector or vector/scalar rotate is supported by the target,
2580 don't do anything here. */
2581 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2582 if (optab1
2583 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2585 use_rotate:
2586 if (bswap16_p)
2588 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2590 def = vect_recog_temp_ssa_var (type, NULL);
2591 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2592 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2593 oprnd0 = def;
2596 /* Pattern detected. */
2597 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2599 *type_out = vectype;
2601 /* Pattern supported. Create a stmt to be used to replace the
2602 pattern. */
2603 var = vect_recog_temp_ssa_var (type, NULL);
2604 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2605 oprnd1);
2606 return pattern_stmt;
2608 return NULL;
2611 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2613 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2614 if (optab2
2615 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2616 goto use_rotate;
2619 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2620 don't do anything here either. */
2621 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2622 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2623 if (!optab1
2624 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2625 || !optab2
2626 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2628 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2629 return NULL;
2630 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2631 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2632 if (!optab1
2633 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2634 || !optab2
2635 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2636 return NULL;
2639 *type_out = vectype;
2641 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2643 def = vect_recog_temp_ssa_var (type, NULL);
2644 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2645 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2646 oprnd0 = def;
2649 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2650 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2652 def = NULL_TREE;
2653 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2654 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2655 def = oprnd1;
2656 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2658 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2659 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2660 && TYPE_PRECISION (TREE_TYPE (rhs1))
2661 == TYPE_PRECISION (type))
2662 def = rhs1;
2665 if (def == NULL_TREE)
2667 def = vect_recog_temp_ssa_var (type, NULL);
2668 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2669 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2671 stype = TREE_TYPE (def);
2673 if (TREE_CODE (def) == INTEGER_CST)
2675 if (!tree_fits_uhwi_p (def)
2676 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2677 || integer_zerop (def))
2678 return NULL;
2679 def2 = build_int_cst (stype,
2680 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2682 else
2684 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2686 if (vecstype == NULL_TREE)
2687 return NULL;
2688 def2 = vect_recog_temp_ssa_var (stype, NULL);
2689 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2690 if (ext_def)
2692 basic_block new_bb
2693 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2694 gcc_assert (!new_bb);
2696 else
2697 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2699 def2 = vect_recog_temp_ssa_var (stype, NULL);
2700 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2701 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2702 gimple_assign_lhs (def_stmt), mask);
2703 if (ext_def)
2705 basic_block new_bb
2706 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2707 gcc_assert (!new_bb);
2709 else
2710 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2713 var1 = vect_recog_temp_ssa_var (type, NULL);
2714 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2715 ? LSHIFT_EXPR : RSHIFT_EXPR,
2716 oprnd0, def);
2717 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2719 var2 = vect_recog_temp_ssa_var (type, NULL);
2720 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2721 ? RSHIFT_EXPR : LSHIFT_EXPR,
2722 oprnd0, def2);
2723 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2725 /* Pattern detected. */
2726 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2728 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2729 var = vect_recog_temp_ssa_var (type, NULL);
2730 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2732 return pattern_stmt;
2735 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2736 vectorized:
2738 type a_t;
2739 TYPE b_T, res_T;
2741 S1 a_t = ;
2742 S2 b_T = ;
2743 S3 res_T = b_T op a_t;
2745 where type 'TYPE' is a type with different size than 'type',
2746 and op is <<, >> or rotate.
2748 Also detect cases:
2750 type a_t;
2751 TYPE b_T, c_T, res_T;
2753 S0 c_T = ;
2754 S1 a_t = (type) c_T;
2755 S2 b_T = ;
2756 S3 res_T = b_T op a_t;
2758 Input/Output:
2760 * STMT_VINFO: The stmt from which the pattern search begins,
2761 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2762 with a shift/rotate which has same type on both operands, in the
2763 second case just b_T op c_T, in the first case with added cast
2764 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2766 Output:
2768 * TYPE_OUT: The type of the output of this pattern.
2770 * Return value: A new stmt that will be used to replace the shift/rotate
2771 S3 stmt. */
2773 static gimple *
2774 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2775 stmt_vec_info stmt_vinfo,
2776 tree *type_out)
2778 gimple *last_stmt = stmt_vinfo->stmt;
2779 tree oprnd0, oprnd1, lhs, var;
2780 gimple *pattern_stmt;
2781 enum tree_code rhs_code;
2783 if (!is_gimple_assign (last_stmt))
2784 return NULL;
2786 rhs_code = gimple_assign_rhs_code (last_stmt);
2787 switch (rhs_code)
2789 case LSHIFT_EXPR:
2790 case RSHIFT_EXPR:
2791 case LROTATE_EXPR:
2792 case RROTATE_EXPR:
2793 break;
2794 default:
2795 return NULL;
2798 lhs = gimple_assign_lhs (last_stmt);
2799 oprnd0 = gimple_assign_rhs1 (last_stmt);
2800 oprnd1 = gimple_assign_rhs2 (last_stmt);
2801 if (TREE_CODE (oprnd0) != SSA_NAME
2802 || TREE_CODE (oprnd1) != SSA_NAME
2803 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2804 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2805 || TYPE_PRECISION (TREE_TYPE (lhs))
2806 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2807 return NULL;
2809 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2810 if (!def_vinfo)
2811 return NULL;
2813 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2814 if (*type_out == NULL_TREE)
2815 return NULL;
2817 tree def = NULL_TREE;
2818 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2819 if (def_stmt && gimple_assign_cast_p (def_stmt))
2821 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2822 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2823 && TYPE_PRECISION (TREE_TYPE (rhs1))
2824 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2826 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2827 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2828 def = rhs1;
2829 else
2831 tree mask
2832 = build_low_bits_mask (TREE_TYPE (rhs1),
2833 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2834 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2835 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2836 tree vecstype = get_vectype_for_scalar_type (vinfo,
2837 TREE_TYPE (rhs1));
2838 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2843 if (def == NULL_TREE)
2845 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2846 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2847 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2850 /* Pattern detected. */
2851 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2853 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2854 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2855 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2857 return pattern_stmt;
2860 /* Return true iff the target has a vector optab implementing the operation
2861 CODE on type VECTYPE. */
2863 static bool
2864 target_has_vecop_for_code (tree_code code, tree vectype)
2866 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2867 return voptab
2868 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2871 /* Verify that the target has optabs of VECTYPE to perform all the steps
2872 needed by the multiplication-by-immediate synthesis algorithm described by
2873 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2874 present. Return true iff the target supports all the steps. */
2876 static bool
2877 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2878 tree vectype, bool synth_shift_p)
2880 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2881 return false;
2883 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2884 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2886 if (var == negate_variant
2887 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2888 return false;
2890 /* If we must synthesize shifts with additions make sure that vector
2891 addition is available. */
2892 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2893 return false;
2895 for (int i = 1; i < alg->ops; i++)
2897 switch (alg->op[i])
2899 case alg_shift:
2900 break;
2901 case alg_add_t_m2:
2902 case alg_add_t2_m:
2903 case alg_add_factor:
2904 if (!supports_vplus)
2905 return false;
2906 break;
2907 case alg_sub_t_m2:
2908 case alg_sub_t2_m:
2909 case alg_sub_factor:
2910 if (!supports_vminus)
2911 return false;
2912 break;
2913 case alg_unknown:
2914 case alg_m:
2915 case alg_zero:
2916 case alg_impossible:
2917 return false;
2918 default:
2919 gcc_unreachable ();
2923 return true;
2926 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2927 putting the final result in DEST. Append all statements but the last into
2928 VINFO. Return the last statement. */
2930 static gimple *
2931 synth_lshift_by_additions (vec_info *vinfo,
2932 tree dest, tree op, HOST_WIDE_INT amnt,
2933 stmt_vec_info stmt_info)
2935 HOST_WIDE_INT i;
2936 tree itype = TREE_TYPE (op);
2937 tree prev_res = op;
2938 gcc_assert (amnt >= 0);
2939 for (i = 0; i < amnt; i++)
2941 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2942 : dest;
2943 gimple *stmt
2944 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2945 prev_res = tmp_var;
2946 if (i < amnt - 1)
2947 append_pattern_def_seq (vinfo, stmt_info, stmt);
2948 else
2949 return stmt;
2951 gcc_unreachable ();
2952 return NULL;
2955 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2956 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2957 the process if necessary. Append the resulting assignment statements
2958 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2959 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2960 left shifts using additions. */
2962 static tree
2963 apply_binop_and_append_stmt (vec_info *vinfo,
2964 tree_code code, tree op1, tree op2,
2965 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2967 if (integer_zerop (op2)
2968 && (code == LSHIFT_EXPR
2969 || code == PLUS_EXPR))
2971 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2972 return op1;
2975 gimple *stmt;
2976 tree itype = TREE_TYPE (op1);
2977 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2979 if (code == LSHIFT_EXPR
2980 && synth_shift_p)
2982 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
2983 TREE_INT_CST_LOW (op2), stmt_vinfo);
2984 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2985 return tmp_var;
2988 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2989 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
2990 return tmp_var;
2993 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2994 and simple arithmetic operations to be vectorized. Record the statements
2995 produced in STMT_VINFO and return the last statement in the sequence or
2996 NULL if it's not possible to synthesize such a multiplication.
2997 This function mirrors the behavior of expand_mult_const in expmed.c but
2998 works on tree-ssa form. */
3000 static gimple *
3001 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3002 stmt_vec_info stmt_vinfo)
3004 tree itype = TREE_TYPE (op);
3005 machine_mode mode = TYPE_MODE (itype);
3006 struct algorithm alg;
3007 mult_variant variant;
3008 if (!tree_fits_shwi_p (val))
3009 return NULL;
3011 /* Multiplication synthesis by shifts, adds and subs can introduce
3012 signed overflow where the original operation didn't. Perform the
3013 operations on an unsigned type and cast back to avoid this.
3014 In the future we may want to relax this for synthesis algorithms
3015 that we can prove do not cause unexpected overflow. */
3016 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3018 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3020 /* Targets that don't support vector shifts but support vector additions
3021 can synthesize shifts that way. */
3022 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3024 HOST_WIDE_INT hwval = tree_to_shwi (val);
3025 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3026 The vectorizer's benefit analysis will decide whether it's beneficial
3027 to do this. */
3028 bool possible = choose_mult_variant (mode, hwval, &alg,
3029 &variant, MAX_COST);
3030 if (!possible)
3031 return NULL;
3033 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3035 if (!vectype
3036 || !target_supports_mult_synth_alg (&alg, variant,
3037 vectype, synth_shift_p))
3038 return NULL;
3040 tree accumulator;
3042 /* Clear out the sequence of statements so we can populate it below. */
3043 gimple *stmt = NULL;
3045 if (cast_to_unsigned_p)
3047 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3048 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3049 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3050 op = tmp_op;
3053 if (alg.op[0] == alg_zero)
3054 accumulator = build_int_cst (multtype, 0);
3055 else
3056 accumulator = op;
3058 bool needs_fixup = (variant == negate_variant)
3059 || (variant == add_variant);
3061 for (int i = 1; i < alg.ops; i++)
3063 tree shft_log = build_int_cst (multtype, alg.log[i]);
3064 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3065 tree tmp_var = NULL_TREE;
3067 switch (alg.op[i])
3069 case alg_shift:
3070 if (synth_shift_p)
3071 stmt
3072 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3073 alg.log[i], stmt_vinfo);
3074 else
3075 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3076 shft_log);
3077 break;
3078 case alg_add_t_m2:
3079 tmp_var
3080 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3081 stmt_vinfo, synth_shift_p);
3082 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3083 tmp_var);
3084 break;
3085 case alg_sub_t_m2:
3086 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3087 shft_log, stmt_vinfo,
3088 synth_shift_p);
3089 /* In some algorithms the first step involves zeroing the
3090 accumulator. If subtracting from such an accumulator
3091 just emit the negation directly. */
3092 if (integer_zerop (accumulator))
3093 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3094 else
3095 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3096 tmp_var);
3097 break;
3098 case alg_add_t2_m:
3099 tmp_var
3100 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3101 shft_log, stmt_vinfo, synth_shift_p);
3102 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3103 break;
3104 case alg_sub_t2_m:
3105 tmp_var
3106 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3107 shft_log, stmt_vinfo, synth_shift_p);
3108 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3109 break;
3110 case alg_add_factor:
3111 tmp_var
3112 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3113 shft_log, stmt_vinfo, synth_shift_p);
3114 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3115 tmp_var);
3116 break;
3117 case alg_sub_factor:
3118 tmp_var
3119 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3120 shft_log, stmt_vinfo, synth_shift_p);
3121 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3122 accumulator);
3123 break;
3124 default:
3125 gcc_unreachable ();
3127 /* We don't want to append the last stmt in the sequence to stmt_vinfo
3128 but rather return it directly. */
3130 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3131 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3132 accumulator = accum_tmp;
3134 if (variant == negate_variant)
3136 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3137 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3138 accumulator = accum_tmp;
3139 if (cast_to_unsigned_p)
3140 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3142 else if (variant == add_variant)
3144 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3145 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3146 accumulator = accum_tmp;
3147 if (cast_to_unsigned_p)
3148 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3150 /* Move back to a signed if needed. */
3151 if (cast_to_unsigned_p)
3153 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3154 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3157 return stmt;
3160 /* Detect multiplication by constant and convert it into a sequence of
3161 shifts and additions, subtractions, negations. We reuse the
3162 choose_mult_variant algorithms from expmed.c
3164 Input/Output:
3166 STMT_VINFO: The stmt from which the pattern search begins,
3167 i.e. the mult stmt.
3169 Output:
3171 * TYPE_OUT: The type of the output of this pattern.
3173 * Return value: A new stmt that will be used to replace
3174 the multiplication. */
3176 static gimple *
3177 vect_recog_mult_pattern (vec_info *vinfo,
3178 stmt_vec_info stmt_vinfo, tree *type_out)
3180 gimple *last_stmt = stmt_vinfo->stmt;
3181 tree oprnd0, oprnd1, vectype, itype;
3182 gimple *pattern_stmt;
3184 if (!is_gimple_assign (last_stmt))
3185 return NULL;
3187 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3188 return NULL;
3190 oprnd0 = gimple_assign_rhs1 (last_stmt);
3191 oprnd1 = gimple_assign_rhs2 (last_stmt);
3192 itype = TREE_TYPE (oprnd0);
3194 if (TREE_CODE (oprnd0) != SSA_NAME
3195 || TREE_CODE (oprnd1) != INTEGER_CST
3196 || !INTEGRAL_TYPE_P (itype)
3197 || !type_has_mode_precision_p (itype))
3198 return NULL;
3200 vectype = get_vectype_for_scalar_type (vinfo, itype);
3201 if (vectype == NULL_TREE)
3202 return NULL;
3204 /* If the target can handle vectorized multiplication natively,
3205 don't attempt to optimize this. */
3206 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3207 if (mul_optab != unknown_optab)
3209 machine_mode vec_mode = TYPE_MODE (vectype);
3210 int icode = (int) optab_handler (mul_optab, vec_mode);
3211 if (icode != CODE_FOR_nothing)
3212 return NULL;
3215 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3216 oprnd0, oprnd1, stmt_vinfo);
3217 if (!pattern_stmt)
3218 return NULL;
3220 /* Pattern detected. */
3221 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3223 *type_out = vectype;
3225 return pattern_stmt;
3228 /* Detect a signed division by a constant that wouldn't be
3229 otherwise vectorized:
3231 type a_t, b_t;
3233 S1 a_t = b_t / N;
3235 where type 'type' is an integral type and N is a constant.
3237 Similarly handle modulo by a constant:
3239 S4 a_t = b_t % N;
3241 Input/Output:
3243 * STMT_VINFO: The stmt from which the pattern search begins,
3244 i.e. the division stmt. S1 is replaced by if N is a power
3245 of two constant and type is signed:
3246 S3 y_t = b_t < 0 ? N - 1 : 0;
3247 S2 x_t = b_t + y_t;
3248 S1' a_t = x_t >> log2 (N);
3250 S4 is replaced if N is a power of two constant and
3251 type is signed by (where *_T temporaries have unsigned type):
3252 S9 y_T = b_t < 0 ? -1U : 0U;
3253 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3254 S7 z_t = (type) z_T;
3255 S6 w_t = b_t + z_t;
3256 S5 x_t = w_t & (N - 1);
3257 S4' a_t = x_t - z_t;
3259 Output:
3261 * TYPE_OUT: The type of the output of this pattern.
3263 * Return value: A new stmt that will be used to replace the division
3264 S1 or modulo S4 stmt. */
3266 static gimple *
3267 vect_recog_divmod_pattern (vec_info *vinfo,
3268 stmt_vec_info stmt_vinfo, tree *type_out)
3270 gimple *last_stmt = stmt_vinfo->stmt;
3271 tree oprnd0, oprnd1, vectype, itype, cond;
3272 gimple *pattern_stmt, *def_stmt;
3273 enum tree_code rhs_code;
3274 optab optab;
3275 tree q;
3276 int dummy_int, prec;
3278 if (!is_gimple_assign (last_stmt))
3279 return NULL;
3281 rhs_code = gimple_assign_rhs_code (last_stmt);
3282 switch (rhs_code)
3284 case TRUNC_DIV_EXPR:
3285 case EXACT_DIV_EXPR:
3286 case TRUNC_MOD_EXPR:
3287 break;
3288 default:
3289 return NULL;
3292 oprnd0 = gimple_assign_rhs1 (last_stmt);
3293 oprnd1 = gimple_assign_rhs2 (last_stmt);
3294 itype = TREE_TYPE (oprnd0);
3295 if (TREE_CODE (oprnd0) != SSA_NAME
3296 || TREE_CODE (oprnd1) != INTEGER_CST
3297 || TREE_CODE (itype) != INTEGER_TYPE
3298 || !type_has_mode_precision_p (itype))
3299 return NULL;
3301 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3302 vectype = get_vectype_for_scalar_type (vinfo, itype);
3303 if (vectype == NULL_TREE)
3304 return NULL;
3306 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3308 /* If the target can handle vectorized division or modulo natively,
3309 don't attempt to optimize this, since native division is likely
3310 to give smaller code. */
3311 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3312 if (optab != unknown_optab)
3314 machine_mode vec_mode = TYPE_MODE (vectype);
3315 int icode = (int) optab_handler (optab, vec_mode);
3316 if (icode != CODE_FOR_nothing)
3317 return NULL;
3321 prec = TYPE_PRECISION (itype);
3322 if (integer_pow2p (oprnd1))
3324 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3325 return NULL;
3327 /* Pattern detected. */
3328 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3330 *type_out = vectype;
3332 /* Check if the target supports this internal function. */
3333 internal_fn ifn = IFN_DIV_POW2;
3334 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3336 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3338 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3339 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3340 gimple_call_set_lhs (div_stmt, var_div);
3342 if (rhs_code == TRUNC_MOD_EXPR)
3344 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3345 def_stmt
3346 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3347 LSHIFT_EXPR, var_div, shift);
3348 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3349 pattern_stmt
3350 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3351 MINUS_EXPR, oprnd0,
3352 gimple_assign_lhs (def_stmt));
3354 else
3355 pattern_stmt = div_stmt;
3356 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3358 return pattern_stmt;
3361 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3362 build_int_cst (itype, 0));
3363 if (rhs_code == TRUNC_DIV_EXPR
3364 || rhs_code == EXACT_DIV_EXPR)
3366 tree var = vect_recog_temp_ssa_var (itype, NULL);
3367 tree shift;
3368 def_stmt
3369 = gimple_build_assign (var, COND_EXPR, cond,
3370 fold_build2 (MINUS_EXPR, itype, oprnd1,
3371 build_int_cst (itype, 1)),
3372 build_int_cst (itype, 0));
3373 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3374 var = vect_recog_temp_ssa_var (itype, NULL);
3375 def_stmt
3376 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3377 gimple_assign_lhs (def_stmt));
3378 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3380 shift = build_int_cst (itype, tree_log2 (oprnd1));
3381 pattern_stmt
3382 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3383 RSHIFT_EXPR, var, shift);
3385 else
3387 tree signmask;
3388 if (compare_tree_int (oprnd1, 2) == 0)
3390 signmask = vect_recog_temp_ssa_var (itype, NULL);
3391 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3392 build_int_cst (itype, 1),
3393 build_int_cst (itype, 0));
3394 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3396 else
3398 tree utype
3399 = build_nonstandard_integer_type (prec, 1);
3400 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3401 tree shift
3402 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3403 - tree_log2 (oprnd1));
3404 tree var = vect_recog_temp_ssa_var (utype, NULL);
3406 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3407 build_int_cst (utype, -1),
3408 build_int_cst (utype, 0));
3409 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3410 var = vect_recog_temp_ssa_var (utype, NULL);
3411 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3412 gimple_assign_lhs (def_stmt),
3413 shift);
3414 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3415 signmask = vect_recog_temp_ssa_var (itype, NULL);
3416 def_stmt
3417 = gimple_build_assign (signmask, NOP_EXPR, var);
3418 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3420 def_stmt
3421 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3422 PLUS_EXPR, oprnd0, signmask);
3423 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3424 def_stmt
3425 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3426 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3427 fold_build2 (MINUS_EXPR, itype, oprnd1,
3428 build_int_cst (itype, 1)));
3429 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3431 pattern_stmt
3432 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3433 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3434 signmask);
3437 return pattern_stmt;
3440 if (prec > HOST_BITS_PER_WIDE_INT
3441 || integer_zerop (oprnd1))
3442 return NULL;
3444 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3445 return NULL;
3447 if (TYPE_UNSIGNED (itype))
3449 unsigned HOST_WIDE_INT mh, ml;
3450 int pre_shift, post_shift;
3451 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3452 & GET_MODE_MASK (itype_mode));
3453 tree t1, t2, t3, t4;
3455 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3456 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3457 return NULL;
3459 /* Find a suitable multiplier and right shift count
3460 instead of multiplying with D. */
3461 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3463 /* If the suggested multiplier is more than SIZE bits, we can do better
3464 for even divisors, using an initial right shift. */
3465 if (mh != 0 && (d & 1) == 0)
3467 pre_shift = ctz_or_zero (d);
3468 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3469 &ml, &post_shift, &dummy_int);
3470 gcc_assert (!mh);
3472 else
3473 pre_shift = 0;
3475 if (mh != 0)
3477 if (post_shift - 1 >= prec)
3478 return NULL;
3480 /* t1 = oprnd0 h* ml;
3481 t2 = oprnd0 - t1;
3482 t3 = t2 >> 1;
3483 t4 = t1 + t3;
3484 q = t4 >> (post_shift - 1); */
3485 t1 = vect_recog_temp_ssa_var (itype, NULL);
3486 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3487 build_int_cst (itype, ml));
3488 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3490 t2 = vect_recog_temp_ssa_var (itype, NULL);
3491 def_stmt
3492 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3493 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3495 t3 = vect_recog_temp_ssa_var (itype, NULL);
3496 def_stmt
3497 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3498 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3500 t4 = vect_recog_temp_ssa_var (itype, NULL);
3501 def_stmt
3502 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3504 if (post_shift != 1)
3506 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3508 q = vect_recog_temp_ssa_var (itype, NULL);
3509 pattern_stmt
3510 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3511 build_int_cst (itype, post_shift - 1));
3513 else
3515 q = t4;
3516 pattern_stmt = def_stmt;
3519 else
3521 if (pre_shift >= prec || post_shift >= prec)
3522 return NULL;
3524 /* t1 = oprnd0 >> pre_shift;
3525 t2 = t1 h* ml;
3526 q = t2 >> post_shift; */
3527 if (pre_shift)
3529 t1 = vect_recog_temp_ssa_var (itype, NULL);
3530 def_stmt
3531 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3532 build_int_cst (NULL, pre_shift));
3533 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3535 else
3536 t1 = oprnd0;
3538 t2 = vect_recog_temp_ssa_var (itype, NULL);
3539 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3540 build_int_cst (itype, ml));
3542 if (post_shift)
3544 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3546 q = vect_recog_temp_ssa_var (itype, NULL);
3547 def_stmt
3548 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3549 build_int_cst (itype, post_shift));
3551 else
3552 q = t2;
3554 pattern_stmt = def_stmt;
3557 else
3559 unsigned HOST_WIDE_INT ml;
3560 int post_shift;
3561 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3562 unsigned HOST_WIDE_INT abs_d;
3563 bool add = false;
3564 tree t1, t2, t3, t4;
3566 /* Give up for -1. */
3567 if (d == -1)
3568 return NULL;
3570 /* Since d might be INT_MIN, we have to cast to
3571 unsigned HOST_WIDE_INT before negating to avoid
3572 undefined signed overflow. */
3573 abs_d = (d >= 0
3574 ? (unsigned HOST_WIDE_INT) d
3575 : - (unsigned HOST_WIDE_INT) d);
3577 /* n rem d = n rem -d */
3578 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3580 d = abs_d;
3581 oprnd1 = build_int_cst (itype, abs_d);
3583 if (HOST_BITS_PER_WIDE_INT >= prec
3584 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3585 /* This case is not handled correctly below. */
3586 return NULL;
3588 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3589 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3591 add = true;
3592 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3594 if (post_shift >= prec)
3595 return NULL;
3597 /* t1 = oprnd0 h* ml; */
3598 t1 = vect_recog_temp_ssa_var (itype, NULL);
3599 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3600 build_int_cst (itype, ml));
3602 if (add)
3604 /* t2 = t1 + oprnd0; */
3605 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3606 t2 = vect_recog_temp_ssa_var (itype, NULL);
3607 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3609 else
3610 t2 = t1;
3612 if (post_shift)
3614 /* t3 = t2 >> post_shift; */
3615 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3616 t3 = vect_recog_temp_ssa_var (itype, NULL);
3617 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3618 build_int_cst (itype, post_shift));
3620 else
3621 t3 = t2;
3623 int msb = 1;
3624 value_range r;
3625 get_range_query (cfun)->range_of_expr (r, oprnd0);
3626 if (r.kind () == VR_RANGE)
3628 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
3629 msb = 0;
3630 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
3631 msb = -1;
3634 if (msb == 0 && d >= 0)
3636 /* q = t3; */
3637 q = t3;
3638 pattern_stmt = def_stmt;
3640 else
3642 /* t4 = oprnd0 >> (prec - 1);
3643 or if we know from VRP that oprnd0 >= 0
3644 t4 = 0;
3645 or if we know from VRP that oprnd0 < 0
3646 t4 = -1; */
3647 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3648 t4 = vect_recog_temp_ssa_var (itype, NULL);
3649 if (msb != 1)
3650 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3651 build_int_cst (itype, msb));
3652 else
3653 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3654 build_int_cst (itype, prec - 1));
3655 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3657 /* q = t3 - t4; or q = t4 - t3; */
3658 q = vect_recog_temp_ssa_var (itype, NULL);
3659 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3660 d < 0 ? t3 : t4);
3664 if (rhs_code == TRUNC_MOD_EXPR)
3666 tree r, t1;
3668 /* We divided. Now finish by:
3669 t1 = q * oprnd1;
3670 r = oprnd0 - t1; */
3671 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3673 t1 = vect_recog_temp_ssa_var (itype, NULL);
3674 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3675 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3677 r = vect_recog_temp_ssa_var (itype, NULL);
3678 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3681 /* Pattern detected. */
3682 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3684 *type_out = vectype;
3685 return pattern_stmt;
3688 /* Function vect_recog_mixed_size_cond_pattern
3690 Try to find the following pattern:
3692 type x_t, y_t;
3693 TYPE a_T, b_T, c_T;
3694 loop:
3695 S1 a_T = x_t CMP y_t ? b_T : c_T;
3697 where type 'TYPE' is an integral type which has different size
3698 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3699 than 'type', the constants need to fit into an integer type
3700 with the same width as 'type') or results of conversion from 'type'.
3702 Input:
3704 * STMT_VINFO: The stmt from which the pattern search begins.
3706 Output:
3708 * TYPE_OUT: The type of the output of this pattern.
3710 * Return value: A new stmt that will be used to replace the pattern.
3711 Additionally a def_stmt is added.
3713 a_it = x_t CMP y_t ? b_it : c_it;
3714 a_T = (TYPE) a_it; */
3716 static gimple *
3717 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3718 stmt_vec_info stmt_vinfo, tree *type_out)
3720 gimple *last_stmt = stmt_vinfo->stmt;
3721 tree cond_expr, then_clause, else_clause;
3722 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3723 gimple *pattern_stmt, *def_stmt;
3724 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3725 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3726 bool promotion;
3727 tree comp_scalar_type;
3729 if (!is_gimple_assign (last_stmt)
3730 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3731 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3732 return NULL;
3734 cond_expr = gimple_assign_rhs1 (last_stmt);
3735 then_clause = gimple_assign_rhs2 (last_stmt);
3736 else_clause = gimple_assign_rhs3 (last_stmt);
3738 if (!COMPARISON_CLASS_P (cond_expr))
3739 return NULL;
3741 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3742 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3743 if (comp_vectype == NULL_TREE)
3744 return NULL;
3746 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3747 if (types_compatible_p (type, comp_scalar_type)
3748 || ((TREE_CODE (then_clause) != INTEGER_CST
3749 || TREE_CODE (else_clause) != INTEGER_CST)
3750 && !INTEGRAL_TYPE_P (comp_scalar_type))
3751 || !INTEGRAL_TYPE_P (type))
3752 return NULL;
3754 if ((TREE_CODE (then_clause) != INTEGER_CST
3755 && !type_conversion_p (vinfo, then_clause, false,
3756 &orig_type0, &def_stmt0, &promotion))
3757 || (TREE_CODE (else_clause) != INTEGER_CST
3758 && !type_conversion_p (vinfo, else_clause, false,
3759 &orig_type1, &def_stmt1, &promotion)))
3760 return NULL;
3762 if (orig_type0 && orig_type1
3763 && !types_compatible_p (orig_type0, orig_type1))
3764 return NULL;
3766 if (orig_type0)
3768 if (!types_compatible_p (orig_type0, comp_scalar_type))
3769 return NULL;
3770 then_clause = gimple_assign_rhs1 (def_stmt0);
3771 itype = orig_type0;
3774 if (orig_type1)
3776 if (!types_compatible_p (orig_type1, comp_scalar_type))
3777 return NULL;
3778 else_clause = gimple_assign_rhs1 (def_stmt1);
3779 itype = orig_type1;
3783 HOST_WIDE_INT cmp_mode_size
3784 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3786 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3787 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3788 return NULL;
3790 vectype = get_vectype_for_scalar_type (vinfo, type);
3791 if (vectype == NULL_TREE)
3792 return NULL;
3794 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3795 return NULL;
3797 if (itype == NULL_TREE)
3798 itype = build_nonstandard_integer_type (cmp_mode_size,
3799 TYPE_UNSIGNED (type));
3801 if (itype == NULL_TREE
3802 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3803 return NULL;
3805 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3806 if (vecitype == NULL_TREE)
3807 return NULL;
3809 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3810 return NULL;
3812 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3814 if ((TREE_CODE (then_clause) == INTEGER_CST
3815 && !int_fits_type_p (then_clause, itype))
3816 || (TREE_CODE (else_clause) == INTEGER_CST
3817 && !int_fits_type_p (else_clause, itype)))
3818 return NULL;
3821 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3822 COND_EXPR, unshare_expr (cond_expr),
3823 fold_convert (itype, then_clause),
3824 fold_convert (itype, else_clause));
3825 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3826 NOP_EXPR, gimple_assign_lhs (def_stmt));
3828 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3829 *type_out = vectype;
3831 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3833 return pattern_stmt;
3837 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3838 true if bool VAR can and should be optimized that way. Assume it shouldn't
3839 in case it's a result of a comparison which can be directly vectorized into
3840 a vector comparison. Fills in STMTS with all stmts visited during the
3841 walk. */
3843 static bool
3844 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3846 tree rhs1;
3847 enum tree_code rhs_code;
3849 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3850 if (!def_stmt_info)
3851 return false;
3853 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3854 if (!def_stmt)
3855 return false;
3857 if (stmts.contains (def_stmt))
3858 return true;
3860 rhs1 = gimple_assign_rhs1 (def_stmt);
3861 rhs_code = gimple_assign_rhs_code (def_stmt);
3862 switch (rhs_code)
3864 case SSA_NAME:
3865 if (! check_bool_pattern (rhs1, vinfo, stmts))
3866 return false;
3867 break;
3869 CASE_CONVERT:
3870 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3871 return false;
3872 if (! check_bool_pattern (rhs1, vinfo, stmts))
3873 return false;
3874 break;
3876 case BIT_NOT_EXPR:
3877 if (! check_bool_pattern (rhs1, vinfo, stmts))
3878 return false;
3879 break;
3881 case BIT_AND_EXPR:
3882 case BIT_IOR_EXPR:
3883 case BIT_XOR_EXPR:
3884 if (! check_bool_pattern (rhs1, vinfo, stmts)
3885 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3886 return false;
3887 break;
3889 default:
3890 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3892 tree vecitype, comp_vectype;
3894 /* If the comparison can throw, then is_gimple_condexpr will be
3895 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3896 if (stmt_could_throw_p (cfun, def_stmt))
3897 return false;
3899 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3900 if (comp_vectype == NULL_TREE)
3901 return false;
3903 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3904 TREE_TYPE (rhs1));
3905 if (mask_type
3906 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3907 return false;
3909 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3911 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3912 tree itype
3913 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3914 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3915 if (vecitype == NULL_TREE)
3916 return false;
3918 else
3919 vecitype = comp_vectype;
3920 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3921 return false;
3923 else
3924 return false;
3925 break;
3928 bool res = stmts.add (def_stmt);
3929 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3930 gcc_assert (!res);
3932 return true;
3936 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3937 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3938 pattern sequence. */
3940 static tree
3941 adjust_bool_pattern_cast (vec_info *vinfo,
3942 tree type, tree var, stmt_vec_info stmt_info)
3944 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3945 NOP_EXPR, var);
3946 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
3947 get_vectype_for_scalar_type (vinfo, type));
3948 return gimple_assign_lhs (cast_stmt);
3951 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3952 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3953 type, OUT_TYPE is the desired final integer type of the whole pattern.
3954 STMT_INFO is the info of the pattern root and is where pattern stmts should
3955 be associated with. DEFS is a map of pattern defs. */
3957 static void
3958 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
3959 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3961 gimple *stmt = SSA_NAME_DEF_STMT (var);
3962 enum tree_code rhs_code, def_rhs_code;
3963 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3964 location_t loc;
3965 gimple *pattern_stmt, *def_stmt;
3966 tree trueval = NULL_TREE;
3968 rhs1 = gimple_assign_rhs1 (stmt);
3969 rhs2 = gimple_assign_rhs2 (stmt);
3970 rhs_code = gimple_assign_rhs_code (stmt);
3971 loc = gimple_location (stmt);
3972 switch (rhs_code)
3974 case SSA_NAME:
3975 CASE_CONVERT:
3976 irhs1 = *defs.get (rhs1);
3977 itype = TREE_TYPE (irhs1);
3978 pattern_stmt
3979 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3980 SSA_NAME, irhs1);
3981 break;
3983 case BIT_NOT_EXPR:
3984 irhs1 = *defs.get (rhs1);
3985 itype = TREE_TYPE (irhs1);
3986 pattern_stmt
3987 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3988 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3989 break;
3991 case BIT_AND_EXPR:
3992 /* Try to optimize x = y & (a < b ? 1 : 0); into
3993 x = (a < b ? y : 0);
3995 E.g. for:
3996 bool a_b, b_b, c_b;
3997 TYPE d_T;
3999 S1 a_b = x1 CMP1 y1;
4000 S2 b_b = x2 CMP2 y2;
4001 S3 c_b = a_b & b_b;
4002 S4 d_T = (TYPE) c_b;
4004 we would normally emit:
4006 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4007 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4008 S3' c_T = a_T & b_T;
4009 S4' d_T = c_T;
4011 but we can save one stmt by using the
4012 result of one of the COND_EXPRs in the other COND_EXPR and leave
4013 BIT_AND_EXPR stmt out:
4015 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4016 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4017 S4' f_T = c_T;
4019 At least when VEC_COND_EXPR is implemented using masks
4020 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4021 computes the comparison masks and ands it, in one case with
4022 all ones vector, in the other case with a vector register.
4023 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4024 often more expensive. */
4025 def_stmt = SSA_NAME_DEF_STMT (rhs2);
4026 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4027 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4029 irhs1 = *defs.get (rhs1);
4030 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4031 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4032 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4034 rhs_code = def_rhs_code;
4035 rhs1 = def_rhs1;
4036 rhs2 = gimple_assign_rhs2 (def_stmt);
4037 trueval = irhs1;
4038 goto do_compare;
4040 else
4041 irhs2 = *defs.get (rhs2);
4042 goto and_ior_xor;
4044 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4045 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4046 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4048 irhs2 = *defs.get (rhs2);
4049 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4050 if (TYPE_PRECISION (TREE_TYPE (irhs2))
4051 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4053 rhs_code = def_rhs_code;
4054 rhs1 = def_rhs1;
4055 rhs2 = gimple_assign_rhs2 (def_stmt);
4056 trueval = irhs2;
4057 goto do_compare;
4059 else
4060 irhs1 = *defs.get (rhs1);
4061 goto and_ior_xor;
4063 /* FALLTHRU */
4064 case BIT_IOR_EXPR:
4065 case BIT_XOR_EXPR:
4066 irhs1 = *defs.get (rhs1);
4067 irhs2 = *defs.get (rhs2);
4068 and_ior_xor:
4069 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4070 != TYPE_PRECISION (TREE_TYPE (irhs2)))
4072 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4073 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4074 int out_prec = TYPE_PRECISION (out_type);
4075 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4076 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4077 stmt_info);
4078 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4079 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4080 stmt_info);
4081 else
4083 irhs1 = adjust_bool_pattern_cast (vinfo,
4084 out_type, irhs1, stmt_info);
4085 irhs2 = adjust_bool_pattern_cast (vinfo,
4086 out_type, irhs2, stmt_info);
4089 itype = TREE_TYPE (irhs1);
4090 pattern_stmt
4091 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4092 rhs_code, irhs1, irhs2);
4093 break;
4095 default:
4096 do_compare:
4097 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4098 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4099 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4100 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4101 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4103 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4104 itype
4105 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4107 else
4108 itype = TREE_TYPE (rhs1);
4109 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4110 if (trueval == NULL_TREE)
4111 trueval = build_int_cst (itype, 1);
4112 else
4113 gcc_checking_assert (useless_type_conversion_p (itype,
4114 TREE_TYPE (trueval)));
4115 pattern_stmt
4116 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4117 COND_EXPR, cond_expr, trueval,
4118 build_int_cst (itype, 0));
4119 break;
4122 gimple_set_location (pattern_stmt, loc);
4123 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4124 get_vectype_for_scalar_type (vinfo, itype));
4125 defs.put (var, gimple_assign_lhs (pattern_stmt));
4128 /* Comparison function to qsort a vector of gimple stmts after UID. */
4130 static int
4131 sort_after_uid (const void *p1, const void *p2)
4133 const gimple *stmt1 = *(const gimple * const *)p1;
4134 const gimple *stmt2 = *(const gimple * const *)p2;
4135 return gimple_uid (stmt1) - gimple_uid (stmt2);
4138 /* Create pattern stmts for all stmts participating in the bool pattern
4139 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4140 OUT_TYPE. Return the def of the pattern root. */
4142 static tree
4143 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4144 tree out_type, stmt_vec_info stmt_info)
4146 /* Gather original stmts in the bool pattern in their order of appearance
4147 in the IL. */
4148 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4149 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4150 i != bool_stmt_set.end (); ++i)
4151 bool_stmts.quick_push (*i);
4152 bool_stmts.qsort (sort_after_uid);
4154 /* Now process them in that order, producing pattern stmts. */
4155 hash_map <tree, tree> defs;
4156 for (unsigned i = 0; i < bool_stmts.length (); ++i)
4157 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4158 out_type, stmt_info, defs);
4160 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4161 gimple *pattern_stmt
4162 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4163 return gimple_assign_lhs (pattern_stmt);
4166 /* Return the proper type for converting bool VAR into
4167 an integer value or NULL_TREE if no such type exists.
4168 The type is chosen so that the converted value has the
4169 same number of elements as VAR's vector type. */
4171 static tree
4172 integer_type_for_mask (tree var, vec_info *vinfo)
4174 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4175 return NULL_TREE;
4177 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4178 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4179 return NULL_TREE;
4181 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4184 /* Function vect_recog_bool_pattern
4186 Try to find pattern like following:
4188 bool a_b, b_b, c_b, d_b, e_b;
4189 TYPE f_T;
4190 loop:
4191 S1 a_b = x1 CMP1 y1;
4192 S2 b_b = x2 CMP2 y2;
4193 S3 c_b = a_b & b_b;
4194 S4 d_b = x3 CMP3 y3;
4195 S5 e_b = c_b | d_b;
4196 S6 f_T = (TYPE) e_b;
4198 where type 'TYPE' is an integral type. Or a similar pattern
4199 ending in
4201 S6 f_Y = e_b ? r_Y : s_Y;
4203 as results from if-conversion of a complex condition.
4205 Input:
4207 * STMT_VINFO: The stmt at the end from which the pattern
4208 search begins, i.e. cast of a bool to
4209 an integer type.
4211 Output:
4213 * TYPE_OUT: The type of the output of this pattern.
4215 * Return value: A new stmt that will be used to replace the pattern.
4217 Assuming size of TYPE is the same as size of all comparisons
4218 (otherwise some casts would be added where needed), the above
4219 sequence we create related pattern stmts:
4220 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4221 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4222 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4223 S5' e_T = c_T | d_T;
4224 S6' f_T = e_T;
4226 Instead of the above S3' we could emit:
4227 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4228 S3' c_T = a_T | b_T;
4229 but the above is more efficient. */
4231 static gimple *
4232 vect_recog_bool_pattern (vec_info *vinfo,
4233 stmt_vec_info stmt_vinfo, tree *type_out)
4235 gimple *last_stmt = stmt_vinfo->stmt;
4236 enum tree_code rhs_code;
4237 tree var, lhs, rhs, vectype;
4238 gimple *pattern_stmt;
4240 if (!is_gimple_assign (last_stmt))
4241 return NULL;
4243 var = gimple_assign_rhs1 (last_stmt);
4244 lhs = gimple_assign_lhs (last_stmt);
4245 rhs_code = gimple_assign_rhs_code (last_stmt);
4247 if (rhs_code == VIEW_CONVERT_EXPR)
4248 var = TREE_OPERAND (var, 0);
4250 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4251 return NULL;
4253 hash_set<gimple *> bool_stmts;
4255 if (CONVERT_EXPR_CODE_P (rhs_code)
4256 || rhs_code == VIEW_CONVERT_EXPR)
4258 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4259 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4260 return NULL;
4261 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4262 if (vectype == NULL_TREE)
4263 return NULL;
4265 if (check_bool_pattern (var, vinfo, bool_stmts))
4267 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4268 TREE_TYPE (lhs), stmt_vinfo);
4269 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4270 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4271 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4272 else
4273 pattern_stmt
4274 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4276 else
4278 tree type = integer_type_for_mask (var, vinfo);
4279 tree cst0, cst1, tmp;
4281 if (!type)
4282 return NULL;
4284 /* We may directly use cond with narrowed type to avoid
4285 multiple cond exprs with following result packing and
4286 perform single cond with packed mask instead. In case
4287 of widening we better make cond first and then extract
4288 results. */
4289 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4290 type = TREE_TYPE (lhs);
4292 cst0 = build_int_cst (type, 0);
4293 cst1 = build_int_cst (type, 1);
4294 tmp = vect_recog_temp_ssa_var (type, NULL);
4295 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4297 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4299 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4300 append_pattern_def_seq (vinfo, stmt_vinfo,
4301 pattern_stmt, new_vectype);
4303 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4304 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4308 *type_out = vectype;
4309 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4311 return pattern_stmt;
4313 else if (rhs_code == COND_EXPR
4314 && TREE_CODE (var) == SSA_NAME)
4316 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4317 if (vectype == NULL_TREE)
4318 return NULL;
4320 /* Build a scalar type for the boolean result that when
4321 vectorized matches the vector type of the result in
4322 size and number of elements. */
4323 unsigned prec
4324 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4325 TYPE_VECTOR_SUBPARTS (vectype));
4327 tree type
4328 = build_nonstandard_integer_type (prec,
4329 TYPE_UNSIGNED (TREE_TYPE (var)));
4330 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4331 return NULL;
4333 if (!check_bool_pattern (var, vinfo, bool_stmts))
4334 return NULL;
4336 rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4338 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4339 pattern_stmt
4340 = gimple_build_assign (lhs, COND_EXPR,
4341 build2 (NE_EXPR, boolean_type_node,
4342 rhs, build_int_cst (type, 0)),
4343 gimple_assign_rhs2 (last_stmt),
4344 gimple_assign_rhs3 (last_stmt));
4345 *type_out = vectype;
4346 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4348 return pattern_stmt;
4350 else if (rhs_code == SSA_NAME
4351 && STMT_VINFO_DATA_REF (stmt_vinfo))
4353 stmt_vec_info pattern_stmt_info;
4354 tree nunits_vectype;
4355 if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4356 &nunits_vectype)
4357 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4358 return NULL;
4360 if (check_bool_pattern (var, vinfo, bool_stmts))
4361 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4362 TREE_TYPE (vectype), stmt_vinfo);
4363 else
4365 tree type = integer_type_for_mask (var, vinfo);
4366 tree cst0, cst1, new_vectype;
4368 if (!type)
4369 return NULL;
4371 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4372 type = TREE_TYPE (vectype);
4374 cst0 = build_int_cst (type, 0);
4375 cst1 = build_int_cst (type, 1);
4376 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4378 rhs = vect_recog_temp_ssa_var (type, NULL);
4379 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4380 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4383 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4384 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4386 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4387 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4388 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4389 rhs = rhs2;
4391 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4392 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4393 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4394 *type_out = vectype;
4395 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4397 return pattern_stmt;
4399 else
4400 return NULL;
4404 /* A helper for vect_recog_mask_conversion_pattern. Build
4405 conversion of MASK to a type suitable for masking VECTYPE.
4406 Built statement gets required vectype and is appended to
4407 a pattern sequence of STMT_VINFO.
4409 Return converted mask. */
4411 static tree
4412 build_mask_conversion (vec_info *vinfo,
4413 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4415 gimple *stmt;
4416 tree masktype, tmp;
4418 masktype = truth_type_for (vectype);
4419 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4420 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4421 append_pattern_def_seq (vinfo, stmt_vinfo,
4422 stmt, masktype, TREE_TYPE (vectype));
4424 return tmp;
4428 /* Function vect_recog_mask_conversion_pattern
4430 Try to find statements which require boolean type
4431 converison. Additional conversion statements are
4432 added to handle such cases. For example:
4434 bool m_1, m_2, m_3;
4435 int i_4, i_5;
4436 double d_6, d_7;
4437 char c_1, c_2, c_3;
4439 S1 m_1 = i_4 > i_5;
4440 S2 m_2 = d_6 < d_7;
4441 S3 m_3 = m_1 & m_2;
4442 S4 c_1 = m_3 ? c_2 : c_3;
4444 Will be transformed into:
4446 S1 m_1 = i_4 > i_5;
4447 S2 m_2 = d_6 < d_7;
4448 S3'' m_2' = (_Bool[bitsize=32])m_2
4449 S3' m_3' = m_1 & m_2';
4450 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4451 S4' c_1' = m_3'' ? c_2 : c_3; */
4453 static gimple *
4454 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4455 stmt_vec_info stmt_vinfo, tree *type_out)
4457 gimple *last_stmt = stmt_vinfo->stmt;
4458 enum tree_code rhs_code;
4459 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4460 tree vectype1, vectype2;
4461 stmt_vec_info pattern_stmt_info;
4462 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4463 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4465 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4466 if (is_gimple_call (last_stmt)
4467 && gimple_call_internal_p (last_stmt))
4469 gcall *pattern_stmt;
4471 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4472 int mask_argno = internal_fn_mask_index (ifn);
4473 if (mask_argno < 0)
4474 return NULL;
4476 bool store_p = internal_store_fn_p (ifn);
4477 if (store_p)
4479 int rhs_index = internal_fn_stored_value_index (ifn);
4480 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4481 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4483 else
4485 lhs = gimple_call_lhs (last_stmt);
4486 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4489 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4490 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4491 if (!mask_arg_type)
4492 return NULL;
4493 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4495 if (!vectype1 || !vectype2
4496 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4497 TYPE_VECTOR_SUBPARTS (vectype2)))
4498 return NULL;
4500 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4502 auto_vec<tree, 8> args;
4503 unsigned int nargs = gimple_call_num_args (last_stmt);
4504 args.safe_grow (nargs, true);
4505 for (unsigned int i = 0; i < nargs; ++i)
4506 args[i] = ((int) i == mask_argno
4507 ? tmp
4508 : gimple_call_arg (last_stmt, i));
4509 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4511 if (!store_p)
4513 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4514 gimple_call_set_lhs (pattern_stmt, lhs);
4516 gimple_call_set_nothrow (pattern_stmt, true);
4518 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4519 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4520 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4522 *type_out = vectype1;
4523 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4525 return pattern_stmt;
4528 if (!is_gimple_assign (last_stmt))
4529 return NULL;
4531 gimple *pattern_stmt;
4532 lhs = gimple_assign_lhs (last_stmt);
4533 rhs1 = gimple_assign_rhs1 (last_stmt);
4534 rhs_code = gimple_assign_rhs_code (last_stmt);
4536 /* Check for cond expression requiring mask conversion. */
4537 if (rhs_code == COND_EXPR)
4539 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4541 if (TREE_CODE (rhs1) == SSA_NAME)
4543 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4544 if (!rhs1_type)
4545 return NULL;
4547 else if (COMPARISON_CLASS_P (rhs1))
4549 /* Check whether we're comparing scalar booleans and (if so)
4550 whether a better mask type exists than the mask associated
4551 with boolean-sized elements. This avoids unnecessary packs
4552 and unpacks if the booleans are set from comparisons of
4553 wider types. E.g. in:
4555 int x1, x2, x3, x4, y1, y1;
4557 bool b1 = (x1 == x2);
4558 bool b2 = (x3 == x4);
4559 ... = b1 == b2 ? y1 : y2;
4561 it is better for b1 and b2 to use the mask type associated
4562 with int elements rather bool (byte) elements. */
4563 rhs1_op0 = TREE_OPERAND (rhs1, 0);
4564 rhs1_op1 = TREE_OPERAND (rhs1, 1);
4565 if (!rhs1_op0 || !rhs1_op1)
4566 return NULL;
4567 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4568 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4570 if (!rhs1_op0_type)
4571 rhs1_type = TREE_TYPE (rhs1_op0);
4572 else if (!rhs1_op1_type)
4573 rhs1_type = TREE_TYPE (rhs1_op1);
4574 else if (TYPE_PRECISION (rhs1_op0_type)
4575 != TYPE_PRECISION (rhs1_op1_type))
4577 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4578 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4579 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4580 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4581 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4583 if (abs (tmp0) > abs (tmp1))
4584 rhs1_type = rhs1_op1_type;
4585 else
4586 rhs1_type = rhs1_op0_type;
4588 else
4589 rhs1_type = build_nonstandard_integer_type
4590 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4592 else
4593 rhs1_type = rhs1_op0_type;
4595 else
4596 return NULL;
4598 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4600 if (!vectype1 || !vectype2)
4601 return NULL;
4603 /* Continue if a conversion is needed. Also continue if we have
4604 a comparison whose vector type would normally be different from
4605 VECTYPE2 when considered in isolation. In that case we'll
4606 replace the comparison with an SSA name (so that we can record
4607 its vector type) and behave as though the comparison was an SSA
4608 name from the outset. */
4609 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4610 TYPE_VECTOR_SUBPARTS (vectype2))
4611 && !rhs1_op0_type
4612 && !rhs1_op1_type)
4613 return NULL;
4615 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4616 in place, we can handle it in vectorizable_condition. This avoids
4617 unnecessary promotion stmts and increased vectorization factor. */
4618 if (COMPARISON_CLASS_P (rhs1)
4619 && INTEGRAL_TYPE_P (rhs1_type)
4620 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4621 TYPE_VECTOR_SUBPARTS (vectype2)))
4623 enum vect_def_type dt;
4624 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4625 && dt == vect_external_def
4626 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4627 && (dt == vect_external_def
4628 || dt == vect_constant_def))
4630 tree wide_scalar_type = build_nonstandard_integer_type
4631 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4632 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4633 wide_scalar_type);
4634 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4635 return NULL;
4639 /* If rhs1 is a comparison we need to move it into a
4640 separate statement. */
4641 if (TREE_CODE (rhs1) != SSA_NAME)
4643 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4644 if (rhs1_op0_type
4645 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4646 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4647 vectype2, stmt_vinfo);
4648 if (rhs1_op1_type
4649 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4650 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4651 vectype2, stmt_vinfo);
4652 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4653 rhs1_op0, rhs1_op1);
4654 rhs1 = tmp;
4655 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4656 rhs1_type);
4659 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4660 TYPE_VECTOR_SUBPARTS (vectype2)))
4661 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4662 else
4663 tmp = rhs1;
4665 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4666 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4667 gimple_assign_rhs2 (last_stmt),
4668 gimple_assign_rhs3 (last_stmt));
4670 *type_out = vectype1;
4671 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4673 return pattern_stmt;
4676 /* Now check for binary boolean operations requiring conversion for
4677 one of operands. */
4678 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4679 return NULL;
4681 if (rhs_code != BIT_IOR_EXPR
4682 && rhs_code != BIT_XOR_EXPR
4683 && rhs_code != BIT_AND_EXPR
4684 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4685 return NULL;
4687 rhs2 = gimple_assign_rhs2 (last_stmt);
4689 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4690 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4692 if (!rhs1_type || !rhs2_type
4693 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4694 return NULL;
4696 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4698 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4699 if (!vectype1)
4700 return NULL;
4701 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4703 else
4705 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4706 if (!vectype1)
4707 return NULL;
4708 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4711 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4712 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4714 *type_out = vectype1;
4715 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4717 return pattern_stmt;
4720 /* STMT_INFO is a load or store. If the load or store is conditional, return
4721 the boolean condition under which it occurs, otherwise return null. */
4723 static tree
4724 vect_get_load_store_mask (stmt_vec_info stmt_info)
4726 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4728 gcc_assert (gimple_assign_single_p (def_assign));
4729 return NULL_TREE;
4732 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4734 internal_fn ifn = gimple_call_internal_fn (def_call);
4735 int mask_index = internal_fn_mask_index (ifn);
4736 return gimple_call_arg (def_call, mask_index);
4739 gcc_unreachable ();
4742 /* Return MASK if MASK is suitable for masking an operation on vectors
4743 of type VECTYPE, otherwise convert it into such a form and return
4744 the result. Associate any conversion statements with STMT_INFO's
4745 pattern. */
4747 static tree
4748 vect_convert_mask_for_vectype (tree mask, tree vectype,
4749 stmt_vec_info stmt_info, vec_info *vinfo)
4751 tree mask_type = integer_type_for_mask (mask, vinfo);
4752 if (mask_type)
4754 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4755 if (mask_vectype
4756 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4757 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4758 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4760 return mask;
4763 /* Return the equivalent of:
4765 fold_convert (TYPE, VALUE)
4767 with the expectation that the operation will be vectorized.
4768 If new statements are needed, add them as pattern statements
4769 to STMT_INFO. */
4771 static tree
4772 vect_add_conversion_to_pattern (vec_info *vinfo,
4773 tree type, tree value, stmt_vec_info stmt_info)
4775 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4776 return value;
4778 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4779 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4780 append_pattern_def_seq (vinfo, stmt_info, conversion,
4781 get_vectype_for_scalar_type (vinfo, type));
4782 return new_value;
4785 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4786 internal function. Return the final statement on success and set
4787 *TYPE_OUT to the vector type being loaded or stored.
4789 This function only handles gathers and scatters that were recognized
4790 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4792 static gimple *
4793 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4794 stmt_vec_info stmt_info, tree *type_out)
4796 /* Currently we only support this for loop vectorization. */
4797 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4798 if (!loop_vinfo)
4799 return NULL;
4801 /* Make sure that we're looking at a gather load or scatter store. */
4802 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4803 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4804 return NULL;
4806 /* Get the boolean that controls whether the load or store happens.
4807 This is null if the operation is unconditional. */
4808 tree mask = vect_get_load_store_mask (stmt_info);
4810 /* Make sure that the target supports an appropriate internal
4811 function for the gather/scatter operation. */
4812 gather_scatter_info gs_info;
4813 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4814 || gs_info.ifn == IFN_LAST)
4815 return NULL;
4817 /* Convert the mask to the right form. */
4818 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4819 gs_info.element_type);
4820 if (mask)
4821 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4822 loop_vinfo);
4823 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
4824 || gs_info.ifn == IFN_MASK_GATHER_LOAD)
4825 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
4827 /* Get the invariant base and non-invariant offset, converting the
4828 latter to the same width as the vector elements. */
4829 tree base = gs_info.base;
4830 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4831 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4832 gs_info.offset, stmt_info);
4834 /* Build the new pattern statement. */
4835 tree scale = size_int (gs_info.scale);
4836 gcall *pattern_stmt;
4837 if (DR_IS_READ (dr))
4839 tree zero = build_zero_cst (gs_info.element_type);
4840 if (mask != NULL)
4841 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4842 offset, scale, zero, mask);
4843 else
4844 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4845 offset, scale, zero);
4846 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4847 gimple_call_set_lhs (pattern_stmt, load_lhs);
4849 else
4851 tree rhs = vect_get_store_rhs (stmt_info);
4852 if (mask != NULL)
4853 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
4854 base, offset, scale, rhs,
4855 mask);
4856 else
4857 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
4858 base, offset, scale, rhs);
4860 gimple_call_set_nothrow (pattern_stmt, true);
4862 /* Copy across relevant vectorization info and associate DR with the
4863 new pattern statement instead of the original statement. */
4864 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4865 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4867 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4868 *type_out = vectype;
4869 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4871 return pattern_stmt;
4874 /* Return true if TYPE is a non-boolean integer type. These are the types
4875 that we want to consider for narrowing. */
4877 static bool
4878 vect_narrowable_type_p (tree type)
4880 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4883 /* Return true if the operation given by CODE can be truncated to N bits
4884 when only N bits of the output are needed. This is only true if bit N+1
4885 of the inputs has no effect on the low N bits of the result. */
4887 static bool
4888 vect_truncatable_operation_p (tree_code code)
4890 switch (code)
4892 case PLUS_EXPR:
4893 case MINUS_EXPR:
4894 case MULT_EXPR:
4895 case BIT_AND_EXPR:
4896 case BIT_IOR_EXPR:
4897 case BIT_XOR_EXPR:
4898 case COND_EXPR:
4899 return true;
4901 default:
4902 return false;
4906 /* Record that STMT_INFO could be changed from operating on TYPE to
4907 operating on a type with the precision and sign given by PRECISION
4908 and SIGN respectively. PRECISION is an arbitrary bit precision;
4909 it might not be a whole number of bytes. */
4911 static void
4912 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4913 unsigned int precision, signop sign)
4915 /* Round the precision up to a whole number of bytes. */
4916 precision = vect_element_precision (precision);
4917 if (precision < TYPE_PRECISION (type)
4918 && (!stmt_info->operation_precision
4919 || stmt_info->operation_precision > precision))
4921 stmt_info->operation_precision = precision;
4922 stmt_info->operation_sign = sign;
4926 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4927 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4928 is an arbitrary bit precision; it might not be a whole number of bytes. */
4930 static void
4931 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4932 unsigned int min_input_precision)
4934 /* This operation in isolation only requires the inputs to have
4935 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4936 that MIN_INPUT_PRECISION is a natural precision for the chain
4937 as a whole. E.g. consider something like:
4939 unsigned short *x, *y;
4940 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4942 The right shift can be done on unsigned chars, and only requires the
4943 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4944 approach would mean turning a natural chain of single-vector unsigned
4945 short operations into one that truncates "*x" and then extends
4946 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4947 operation and one vector for each unsigned char operation.
4948 This would be a significant pessimization.
4950 Instead only propagate the maximum of this precision and the precision
4951 required by the users of the result. This means that we don't pessimize
4952 the case above but continue to optimize things like:
4954 unsigned char *y;
4955 unsigned short *x;
4956 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4958 Here we would truncate two vectors of *x to a single vector of
4959 unsigned chars and use single-vector unsigned char operations for
4960 everything else, rather than doing two unsigned short copies of
4961 "(*x & 0xf0) >> 4" and then truncating the result. */
4962 min_input_precision = MAX (min_input_precision,
4963 stmt_info->min_output_precision);
4965 if (min_input_precision < TYPE_PRECISION (type)
4966 && (!stmt_info->min_input_precision
4967 || stmt_info->min_input_precision > min_input_precision))
4968 stmt_info->min_input_precision = min_input_precision;
4971 /* Subroutine of vect_determine_min_output_precision. Return true if
4972 we can calculate a reduced number of output bits for STMT_INFO,
4973 whose result is LHS. */
4975 static bool
4976 vect_determine_min_output_precision_1 (vec_info *vinfo,
4977 stmt_vec_info stmt_info, tree lhs)
4979 /* Take the maximum precision required by users of the result. */
4980 unsigned int precision = 0;
4981 imm_use_iterator iter;
4982 use_operand_p use;
4983 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4985 gimple *use_stmt = USE_STMT (use);
4986 if (is_gimple_debug (use_stmt))
4987 continue;
4988 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4989 if (!use_stmt_info || !use_stmt_info->min_input_precision)
4990 return false;
4991 /* The input precision recorded for COND_EXPRs applies only to the
4992 "then" and "else" values. */
4993 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4994 if (assign
4995 && gimple_assign_rhs_code (assign) == COND_EXPR
4996 && use->use != gimple_assign_rhs2_ptr (assign)
4997 && use->use != gimple_assign_rhs3_ptr (assign))
4998 return false;
4999 precision = MAX (precision, use_stmt_info->min_input_precision);
5002 if (dump_enabled_p ())
5003 dump_printf_loc (MSG_NOTE, vect_location,
5004 "only the low %d bits of %T are significant\n",
5005 precision, lhs);
5006 stmt_info->min_output_precision = precision;
5007 return true;
5010 /* Calculate min_output_precision for STMT_INFO. */
5012 static void
5013 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5015 /* We're only interested in statements with a narrowable result. */
5016 tree lhs = gimple_get_lhs (stmt_info->stmt);
5017 if (!lhs
5018 || TREE_CODE (lhs) != SSA_NAME
5019 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5020 return;
5022 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5023 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5026 /* Use range information to decide whether STMT (described by STMT_INFO)
5027 could be done in a narrower type. This is effectively a forward
5028 propagation, since it uses context-independent information that applies
5029 to all users of an SSA name. */
5031 static void
5032 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5034 tree lhs = gimple_assign_lhs (stmt);
5035 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5036 return;
5038 tree type = TREE_TYPE (lhs);
5039 if (!vect_narrowable_type_p (type))
5040 return;
5042 /* First see whether we have any useful range information for the result. */
5043 unsigned int precision = TYPE_PRECISION (type);
5044 signop sign = TYPE_SIGN (type);
5045 wide_int min_value, max_value;
5046 if (!vect_get_range_info (lhs, &min_value, &max_value))
5047 return;
5049 tree_code code = gimple_assign_rhs_code (stmt);
5050 unsigned int nops = gimple_num_ops (stmt);
5052 if (!vect_truncatable_operation_p (code))
5053 /* Check that all relevant input operands are compatible, and update
5054 [MIN_VALUE, MAX_VALUE] to include their ranges. */
5055 for (unsigned int i = 1; i < nops; ++i)
5057 tree op = gimple_op (stmt, i);
5058 if (TREE_CODE (op) == INTEGER_CST)
5060 /* Don't require the integer to have RHS_TYPE (which it might
5061 not for things like shift amounts, etc.), but do require it
5062 to fit the type. */
5063 if (!int_fits_type_p (op, type))
5064 return;
5066 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
5067 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
5069 else if (TREE_CODE (op) == SSA_NAME)
5071 /* Ignore codes that don't take uniform arguments. */
5072 if (!types_compatible_p (TREE_TYPE (op), type))
5073 return;
5075 wide_int op_min_value, op_max_value;
5076 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5077 return;
5079 min_value = wi::min (min_value, op_min_value, sign);
5080 max_value = wi::max (max_value, op_max_value, sign);
5082 else
5083 return;
5086 /* Try to switch signed types for unsigned types if we can.
5087 This is better for two reasons. First, unsigned ops tend
5088 to be cheaper than signed ops. Second, it means that we can
5089 handle things like:
5091 signed char c;
5092 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5096 signed char c;
5097 unsigned short res_1 = (unsigned short) c & 0xff00;
5098 int res = (int) res_1;
5100 where the intermediate result res_1 has unsigned rather than
5101 signed type. */
5102 if (sign == SIGNED && !wi::neg_p (min_value))
5103 sign = UNSIGNED;
5105 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
5106 unsigned int precision1 = wi::min_precision (min_value, sign);
5107 unsigned int precision2 = wi::min_precision (max_value, sign);
5108 unsigned int value_precision = MAX (precision1, precision2);
5109 if (value_precision >= precision)
5110 return;
5112 if (dump_enabled_p ())
5113 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5114 " without loss of precision: %G",
5115 sign == SIGNED ? "signed" : "unsigned",
5116 value_precision, stmt);
5118 vect_set_operation_type (stmt_info, type, value_precision, sign);
5119 vect_set_min_input_precision (stmt_info, type, value_precision);
5122 /* Use information about the users of STMT's result to decide whether
5123 STMT (described by STMT_INFO) could be done in a narrower type.
5124 This is effectively a backward propagation. */
5126 static void
5127 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5129 tree_code code = gimple_assign_rhs_code (stmt);
5130 unsigned int opno = (code == COND_EXPR ? 2 : 1);
5131 tree type = TREE_TYPE (gimple_op (stmt, opno));
5132 if (!vect_narrowable_type_p (type))
5133 return;
5135 unsigned int precision = TYPE_PRECISION (type);
5136 unsigned int operation_precision, min_input_precision;
5137 switch (code)
5139 CASE_CONVERT:
5140 /* Only the bits that contribute to the output matter. Don't change
5141 the precision of the operation itself. */
5142 operation_precision = precision;
5143 min_input_precision = stmt_info->min_output_precision;
5144 break;
5146 case LSHIFT_EXPR:
5147 case RSHIFT_EXPR:
5149 tree shift = gimple_assign_rhs2 (stmt);
5150 if (TREE_CODE (shift) != INTEGER_CST
5151 || !wi::ltu_p (wi::to_widest (shift), precision))
5152 return;
5153 unsigned int const_shift = TREE_INT_CST_LOW (shift);
5154 if (code == LSHIFT_EXPR)
5156 /* Avoid creating an undefined shift.
5158 ??? We could instead use min_output_precision as-is and
5159 optimize out-of-range shifts to zero. However, only
5160 degenerate testcases shift away all their useful input data,
5161 and it isn't natural to drop input operations in the middle
5162 of vectorization. This sort of thing should really be
5163 handled before vectorization. */
5164 operation_precision = MAX (stmt_info->min_output_precision,
5165 const_shift + 1);
5166 /* We need CONST_SHIFT fewer bits of the input. */
5167 min_input_precision = (MAX (operation_precision, const_shift)
5168 - const_shift);
5170 else
5172 /* We need CONST_SHIFT extra bits to do the operation. */
5173 operation_precision = (stmt_info->min_output_precision
5174 + const_shift);
5175 min_input_precision = operation_precision;
5177 break;
5180 default:
5181 if (vect_truncatable_operation_p (code))
5183 /* Input bit N has no effect on output bits N-1 and lower. */
5184 operation_precision = stmt_info->min_output_precision;
5185 min_input_precision = operation_precision;
5186 break;
5188 return;
5191 if (operation_precision < precision)
5193 if (dump_enabled_p ())
5194 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5195 " without affecting users: %G",
5196 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5197 operation_precision, stmt);
5198 vect_set_operation_type (stmt_info, type, operation_precision,
5199 TYPE_SIGN (type));
5201 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5204 /* Return true if the statement described by STMT_INFO sets a boolean
5205 SSA_NAME and if we know how to vectorize this kind of statement using
5206 vector mask types. */
5208 static bool
5209 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5211 tree lhs = gimple_get_lhs (stmt_info->stmt);
5212 if (!lhs
5213 || TREE_CODE (lhs) != SSA_NAME
5214 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5215 return false;
5217 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5219 tree_code rhs_code = gimple_assign_rhs_code (assign);
5220 switch (rhs_code)
5222 CASE_CONVERT:
5223 case SSA_NAME:
5224 case BIT_NOT_EXPR:
5225 case BIT_IOR_EXPR:
5226 case BIT_XOR_EXPR:
5227 case BIT_AND_EXPR:
5228 return true;
5230 default:
5231 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5234 else if (is_a <gphi *> (stmt_info->stmt))
5235 return true;
5236 return false;
5239 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5240 a vector mask type instead of a normal vector type. Record the
5241 result in STMT_INFO->mask_precision. */
5243 static void
5244 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5246 if (!possible_vector_mask_operation_p (stmt_info))
5247 return;
5249 /* If at least one boolean input uses a vector mask type,
5250 pick the mask type with the narrowest elements.
5252 ??? This is the traditional behavior. It should always produce
5253 the smallest number of operations, but isn't necessarily the
5254 optimal choice. For example, if we have:
5256 a = b & c
5258 where:
5260 - the user of a wants it to have a mask type for 16-bit elements (M16)
5261 - b also uses M16
5262 - c uses a mask type for 8-bit elements (M8)
5264 then picking M8 gives:
5266 - 1 M16->M8 pack for b
5267 - 1 M8 AND for a
5268 - 2 M8->M16 unpacks for the user of a
5270 whereas picking M16 would have given:
5272 - 2 M8->M16 unpacks for c
5273 - 2 M16 ANDs for a
5275 The number of operations are equal, but M16 would have given
5276 a shorter dependency chain and allowed more ILP. */
5277 unsigned int precision = ~0U;
5278 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5280 unsigned int nops = gimple_num_ops (assign);
5281 for (unsigned int i = 1; i < nops; ++i)
5283 tree rhs = gimple_op (assign, i);
5284 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5285 continue;
5287 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5288 if (!def_stmt_info)
5289 /* Don't let external or constant operands influence the choice.
5290 We can convert them to whichever vector type we pick. */
5291 continue;
5293 if (def_stmt_info->mask_precision)
5295 if (precision > def_stmt_info->mask_precision)
5296 precision = def_stmt_info->mask_precision;
5300 /* If the statement compares two values that shouldn't use vector masks,
5301 try comparing the values as normal scalars instead. */
5302 tree_code rhs_code = gimple_assign_rhs_code (assign);
5303 if (precision == ~0U
5304 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5306 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5307 scalar_mode mode;
5308 tree vectype, mask_type;
5309 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5310 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5311 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5312 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5313 precision = GET_MODE_BITSIZE (mode);
5316 else
5318 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5319 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5321 tree rhs = gimple_phi_arg_def (phi, i);
5323 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5324 if (!def_stmt_info)
5325 /* Don't let external or constant operands influence the choice.
5326 We can convert them to whichever vector type we pick. */
5327 continue;
5329 if (def_stmt_info->mask_precision)
5331 if (precision > def_stmt_info->mask_precision)
5332 precision = def_stmt_info->mask_precision;
5337 if (dump_enabled_p ())
5339 if (precision == ~0U)
5340 dump_printf_loc (MSG_NOTE, vect_location,
5341 "using normal nonmask vectors for %G",
5342 stmt_info->stmt);
5343 else
5344 dump_printf_loc (MSG_NOTE, vect_location,
5345 "using boolean precision %d for %G",
5346 precision, stmt_info->stmt);
5349 stmt_info->mask_precision = precision;
5352 /* Handle vect_determine_precisions for STMT_INFO, given that we
5353 have already done so for the users of its result. */
5355 void
5356 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5358 vect_determine_min_output_precision (vinfo, stmt_info);
5359 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5361 vect_determine_precisions_from_range (stmt_info, stmt);
5362 vect_determine_precisions_from_users (stmt_info, stmt);
5366 /* Walk backwards through the vectorizable region to determine the
5367 values of these fields:
5369 - min_output_precision
5370 - min_input_precision
5371 - operation_precision
5372 - operation_sign. */
5374 void
5375 vect_determine_precisions (vec_info *vinfo)
5377 DUMP_VECT_SCOPE ("vect_determine_precisions");
5379 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5381 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5382 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5383 unsigned int nbbs = loop->num_nodes;
5385 for (unsigned int i = 0; i < nbbs; i++)
5387 basic_block bb = bbs[i];
5388 for (auto gsi = gsi_start_phis (bb);
5389 !gsi_end_p (gsi); gsi_next (&gsi))
5391 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5392 if (stmt_info)
5393 vect_determine_mask_precision (vinfo, stmt_info);
5395 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5396 if (!is_gimple_debug (gsi_stmt (si)))
5397 vect_determine_mask_precision
5398 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5400 for (unsigned int i = 0; i < nbbs; i++)
5402 basic_block bb = bbs[nbbs - i - 1];
5403 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5404 !gsi_end_p (si); gsi_prev (&si))
5405 if (!is_gimple_debug (gsi_stmt (si)))
5406 vect_determine_stmt_precisions
5407 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5408 for (auto gsi = gsi_start_phis (bb);
5409 !gsi_end_p (gsi); gsi_next (&gsi))
5411 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5412 if (stmt_info)
5413 vect_determine_stmt_precisions (vinfo, stmt_info);
5417 else
5419 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5420 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5422 basic_block bb = bb_vinfo->bbs[i];
5423 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5425 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5426 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5427 vect_determine_mask_precision (vinfo, stmt_info);
5429 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5431 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5432 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5433 vect_determine_mask_precision (vinfo, stmt_info);
5436 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5438 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5439 !gsi_end_p (gsi); gsi_prev (&gsi))
5441 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5442 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5443 vect_determine_stmt_precisions (vinfo, stmt_info);
5445 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5446 !gsi_end_p (gsi); gsi_next (&gsi))
5448 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5449 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5450 vect_determine_stmt_precisions (vinfo, stmt_info);
5456 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5458 struct vect_recog_func
5460 vect_recog_func_ptr fn;
5461 const char *name;
5464 /* Note that ordering matters - the first pattern matching on a stmt is
5465 taken which means usually the more complex one needs to preceed the
5466 less comples onex (widen_sum only after dot_prod or sad for example). */
5467 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5468 { vect_recog_over_widening_pattern, "over_widening" },
5469 /* Must come after over_widening, which narrows the shift as much as
5470 possible beforehand. */
5471 { vect_recog_average_pattern, "average" },
5472 { vect_recog_mulhs_pattern, "mult_high" },
5473 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5474 { vect_recog_widen_mult_pattern, "widen_mult" },
5475 { vect_recog_dot_prod_pattern, "dot_prod" },
5476 { vect_recog_sad_pattern, "sad" },
5477 { vect_recog_widen_sum_pattern, "widen_sum" },
5478 { vect_recog_pow_pattern, "pow" },
5479 { vect_recog_popcount_pattern, "popcount" },
5480 { vect_recog_widen_shift_pattern, "widen_shift" },
5481 { vect_recog_rotate_pattern, "rotate" },
5482 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5483 { vect_recog_divmod_pattern, "divmod" },
5484 { vect_recog_mult_pattern, "mult" },
5485 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5486 { vect_recog_bool_pattern, "bool" },
5487 /* This must come before mask conversion, and includes the parts
5488 of mask conversion that are needed for gather and scatter
5489 internal functions. */
5490 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5491 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5492 { vect_recog_widen_plus_pattern, "widen_plus" },
5493 { vect_recog_widen_minus_pattern, "widen_minus" },
5496 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5498 /* Mark statements that are involved in a pattern. */
5500 void
5501 vect_mark_pattern_stmts (vec_info *vinfo,
5502 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5503 tree pattern_vectype)
5505 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5506 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5508 gimple *orig_pattern_stmt = NULL;
5509 if (is_pattern_stmt_p (orig_stmt_info))
5511 /* We're replacing a statement in an existing pattern definition
5512 sequence. */
5513 orig_pattern_stmt = orig_stmt_info->stmt;
5514 if (dump_enabled_p ())
5515 dump_printf_loc (MSG_NOTE, vect_location,
5516 "replacing earlier pattern %G", orig_pattern_stmt);
5518 /* To keep the book-keeping simple, just swap the lhs of the
5519 old and new statements, so that the old one has a valid but
5520 unused lhs. */
5521 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5522 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5523 gimple_set_lhs (pattern_stmt, old_lhs);
5525 if (dump_enabled_p ())
5526 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5528 /* Switch to the statement that ORIG replaces. */
5529 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5531 /* We shouldn't be replacing the main pattern statement. */
5532 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5533 != orig_pattern_stmt);
5536 if (def_seq)
5537 for (gimple_stmt_iterator si = gsi_start (def_seq);
5538 !gsi_end_p (si); gsi_next (&si))
5540 if (dump_enabled_p ())
5541 dump_printf_loc (MSG_NOTE, vect_location,
5542 "extra pattern stmt: %G", gsi_stmt (si));
5543 stmt_vec_info pattern_stmt_info
5544 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5545 orig_stmt_info, pattern_vectype);
5546 /* Stmts in the def sequence are not vectorizable cycle or
5547 induction defs, instead they should all be vect_internal_def
5548 feeding the main pattern stmt which retains this def type. */
5549 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5552 if (orig_pattern_stmt)
5554 vect_init_pattern_stmt (vinfo, pattern_stmt,
5555 orig_stmt_info, pattern_vectype);
5557 /* Insert all the new pattern statements before the original one. */
5558 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5559 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5560 orig_def_seq);
5561 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5562 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5564 /* Remove the pattern statement that this new pattern replaces. */
5565 gsi_remove (&gsi, false);
5567 else
5568 vect_set_pattern_stmt (vinfo,
5569 pattern_stmt, orig_stmt_info, pattern_vectype);
5571 /* Transfer reduction path info to the pattern. */
5572 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5574 tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5575 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5576 /* Search the pattern def sequence and the main pattern stmt. Note
5577 we may have inserted all into a containing pattern def sequence
5578 so the following is a bit awkward. */
5579 gimple_stmt_iterator si;
5580 gimple *s;
5581 if (def_seq)
5583 si = gsi_start (def_seq);
5584 s = gsi_stmt (si);
5585 gsi_next (&si);
5587 else
5589 si = gsi_none ();
5590 s = pattern_stmt;
5594 bool found = false;
5595 for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5596 if (gimple_op (s, i) == lookfor)
5598 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5599 lookfor = gimple_get_lhs (s);
5600 found = true;
5601 break;
5603 if (s == pattern_stmt)
5605 if (!found && dump_enabled_p ())
5606 dump_printf_loc (MSG_NOTE, vect_location,
5607 "failed to update reduction index.\n");
5608 break;
5610 if (gsi_end_p (si))
5611 s = pattern_stmt;
5612 else
5614 s = gsi_stmt (si);
5615 if (s == pattern_stmt)
5616 /* Found the end inside a bigger pattern def seq. */
5617 si = gsi_none ();
5618 else
5619 gsi_next (&si);
5621 } while (1);
5625 /* Function vect_pattern_recog_1
5627 Input:
5628 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5629 computation pattern.
5630 STMT_INFO: A stmt from which the pattern search should start.
5632 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5633 a sequence of statements that has the same functionality and can be
5634 used to replace STMT_INFO. It returns the last statement in the sequence
5635 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5636 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5637 statement, having first checked that the target supports the new operation
5638 in that type.
5640 This function also does some bookkeeping, as explained in the documentation
5641 for vect_recog_pattern. */
5643 static void
5644 vect_pattern_recog_1 (vec_info *vinfo,
5645 vect_recog_func *recog_func, stmt_vec_info stmt_info)
5647 gimple *pattern_stmt;
5648 loop_vec_info loop_vinfo;
5649 tree pattern_vectype;
5651 /* If this statement has already been replaced with pattern statements,
5652 leave the original statement alone, since the first match wins.
5653 Instead try to match against the definition statements that feed
5654 the main pattern statement. */
5655 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5657 gimple_stmt_iterator gsi;
5658 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5659 !gsi_end_p (gsi); gsi_next (&gsi))
5660 vect_pattern_recog_1 (vinfo, recog_func,
5661 vinfo->lookup_stmt (gsi_stmt (gsi)));
5662 return;
5665 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5666 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5667 if (!pattern_stmt)
5669 /* Clear any half-formed pattern definition sequence. */
5670 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5671 return;
5674 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5675 gcc_assert (pattern_vectype);
5677 /* Found a vectorizable pattern. */
5678 if (dump_enabled_p ())
5679 dump_printf_loc (MSG_NOTE, vect_location,
5680 "%s pattern recognized: %G",
5681 recog_func->name, pattern_stmt);
5683 /* Mark the stmts that are involved in the pattern. */
5684 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5686 /* Patterns cannot be vectorized using SLP, because they change the order of
5687 computation. */
5688 if (loop_vinfo)
5690 unsigned ix, ix2;
5691 stmt_vec_info *elem_ptr;
5692 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5693 elem_ptr, *elem_ptr == stmt_info);
5698 /* Function vect_pattern_recog
5700 Input:
5701 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5702 computation idioms.
5704 Output - for each computation idiom that is detected we create a new stmt
5705 that provides the same functionality and that can be vectorized. We
5706 also record some information in the struct_stmt_info of the relevant
5707 stmts, as explained below:
5709 At the entry to this function we have the following stmts, with the
5710 following initial value in the STMT_VINFO fields:
5712 stmt in_pattern_p related_stmt vec_stmt
5713 S1: a_i = .... - - -
5714 S2: a_2 = ..use(a_i).. - - -
5715 S3: a_1 = ..use(a_2).. - - -
5716 S4: a_0 = ..use(a_1).. - - -
5717 S5: ... = ..use(a_0).. - - -
5719 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5720 represented by a single stmt. We then:
5721 - create a new stmt S6 equivalent to the pattern (the stmt is not
5722 inserted into the code)
5723 - fill in the STMT_VINFO fields as follows:
5725 in_pattern_p related_stmt vec_stmt
5726 S1: a_i = .... - - -
5727 S2: a_2 = ..use(a_i).. - - -
5728 S3: a_1 = ..use(a_2).. - - -
5729 S4: a_0 = ..use(a_1).. true S6 -
5730 '---> S6: a_new = .... - S4 -
5731 S5: ... = ..use(a_0).. - - -
5733 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5734 to each other through the RELATED_STMT field).
5736 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5737 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5738 remain irrelevant unless used by stmts other than S4.
5740 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5741 (because they are marked as irrelevant). It will vectorize S6, and record
5742 a pointer to the new vector stmt VS6 from S6 (as usual).
5743 S4 will be skipped, and S5 will be vectorized as usual:
5745 in_pattern_p related_stmt vec_stmt
5746 S1: a_i = .... - - -
5747 S2: a_2 = ..use(a_i).. - - -
5748 S3: a_1 = ..use(a_2).. - - -
5749 > VS6: va_new = .... - - -
5750 S4: a_0 = ..use(a_1).. true S6 VS6
5751 '---> S6: a_new = .... - S4 VS6
5752 > VS5: ... = ..vuse(va_new).. - - -
5753 S5: ... = ..use(a_0).. - - -
5755 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5756 elsewhere), and we'll end up with:
5758 VS6: va_new = ....
5759 VS5: ... = ..vuse(va_new)..
5761 In case of more than one pattern statements, e.g., widen-mult with
5762 intermediate type:
5764 S1 a_t = ;
5765 S2 a_T = (TYPE) a_t;
5766 '--> S3: a_it = (interm_type) a_t;
5767 S4 prod_T = a_T * CONST;
5768 '--> S5: prod_T' = a_it w* CONST;
5770 there may be other users of a_T outside the pattern. In that case S2 will
5771 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5772 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5773 be recorded in S3. */
5775 void
5776 vect_pattern_recog (vec_info *vinfo)
5778 class loop *loop;
5779 basic_block *bbs;
5780 unsigned int nbbs;
5781 gimple_stmt_iterator si;
5782 unsigned int i, j;
5784 vect_determine_precisions (vinfo);
5786 DUMP_VECT_SCOPE ("vect_pattern_recog");
5788 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5790 loop = LOOP_VINFO_LOOP (loop_vinfo);
5791 bbs = LOOP_VINFO_BBS (loop_vinfo);
5792 nbbs = loop->num_nodes;
5794 /* Scan through the loop stmts, applying the pattern recognition
5795 functions starting at each stmt visited: */
5796 for (i = 0; i < nbbs; i++)
5798 basic_block bb = bbs[i];
5799 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5801 if (is_gimple_debug (gsi_stmt (si)))
5802 continue;
5803 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5804 /* Scan over all generic vect_recog_xxx_pattern functions. */
5805 for (j = 0; j < NUM_PATTERNS; j++)
5806 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5807 stmt_info);
5811 else
5813 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5814 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5815 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5816 !gsi_end_p (gsi); gsi_next (&gsi))
5818 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5819 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5820 continue;
5822 /* Scan over all generic vect_recog_xxx_pattern functions. */
5823 for (j = 0; j < NUM_PATTERNS; j++)
5824 vect_pattern_recog_1 (vinfo,
5825 &vect_vect_recog_func_ptrs[j], stmt_info);
5829 /* After this no more add_stmt calls are allowed. */
5830 vinfo->stmt_vec_info_ro = true;