Default to dwarf version 4 on hppa64-hpux
[official-gcc.git] / gcc / tree-vect-patterns.c
blobe6c5bcdad36e6ee82261202093b12d8c625a21fa
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 #include "gimple-range.h"
53 /* Return true if we have a useful VR_RANGE range for VAR, storing it
54 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
56 static bool
57 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
59 value_range vr;
60 get_range_query (cfun)->range_of_expr (vr, var);
61 if (vr.undefined_p ())
62 vr.set_varying (TREE_TYPE (var));
63 *min_value = wi::to_wide (vr.min ());
64 *max_value = wi::to_wide (vr.max ());
65 value_range_kind vr_type = vr.kind ();
66 wide_int nonzero = get_nonzero_bits (var);
67 signop sgn = TYPE_SIGN (TREE_TYPE (var));
68 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
69 nonzero, sgn) == VR_RANGE)
71 if (dump_enabled_p ())
73 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
74 dump_printf (MSG_NOTE, " has range [");
75 dump_hex (MSG_NOTE, *min_value);
76 dump_printf (MSG_NOTE, ", ");
77 dump_hex (MSG_NOTE, *max_value);
78 dump_printf (MSG_NOTE, "]\n");
80 return true;
82 else
84 if (dump_enabled_p ())
86 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
87 dump_printf (MSG_NOTE, " has no range info\n");
89 return false;
93 /* Report that we've found an instance of pattern PATTERN in
94 statement STMT. */
96 static void
97 vect_pattern_detected (const char *name, gimple *stmt)
99 if (dump_enabled_p ())
100 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
103 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
104 return the pattern statement's stmt_vec_info. Set its vector type to
105 VECTYPE if it doesn't have one already. */
107 static stmt_vec_info
108 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
109 stmt_vec_info orig_stmt_info, tree vectype)
111 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
112 if (pattern_stmt_info == NULL)
113 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
114 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
116 pattern_stmt_info->pattern_stmt_p = true;
117 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
118 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
119 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
120 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
122 gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
123 == vect_use_mask_type_p (orig_stmt_info));
124 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
125 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
127 return pattern_stmt_info;
130 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
131 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
132 have one already. */
134 static void
135 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
136 stmt_vec_info orig_stmt_info, tree vectype)
138 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
139 STMT_VINFO_RELATED_STMT (orig_stmt_info)
140 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
143 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
144 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
145 be different from the vector type of the final pattern statement.
146 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
147 from which it was derived. */
149 static inline void
150 append_pattern_def_seq (vec_info *vinfo,
151 stmt_vec_info stmt_info, gimple *new_stmt,
152 tree vectype = NULL_TREE,
153 tree scalar_type_for_mask = NULL_TREE)
155 gcc_assert (!scalar_type_for_mask
156 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
157 if (vectype)
159 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
160 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
161 if (scalar_type_for_mask)
162 new_stmt_info->mask_precision
163 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
165 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
166 new_stmt);
169 /* The caller wants to perform new operations on vect_external variable
170 VAR, so that the result of the operations would also be vect_external.
171 Return the edge on which the operations can be performed, if one exists.
172 Return null if the operations should instead be treated as part of
173 the pattern that needs them. */
175 static edge
176 vect_get_external_def_edge (vec_info *vinfo, tree var)
178 edge e = NULL;
179 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
181 e = loop_preheader_edge (loop_vinfo->loop);
182 if (!SSA_NAME_IS_DEFAULT_DEF (var))
184 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
185 if (bb == NULL
186 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
187 e = NULL;
190 return e;
193 /* Return true if the target supports a vector version of CODE,
194 where CODE is known to map to a direct optab with the given SUBTYPE.
195 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
196 specifies the type of the scalar result.
198 If CODE allows the inputs and outputs to have different type
199 (such as for WIDEN_SUM_EXPR), it is the input mode rather
200 than the output mode that determines the appropriate target pattern.
201 Operand 0 of the target pattern then specifies the mode that the output
202 must have.
204 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
205 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
206 is nonnull. */
208 static bool
209 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
210 tree itype, tree *vecotype_out,
211 tree *vecitype_out = NULL,
212 enum optab_subtype subtype = optab_default)
214 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
215 if (!vecitype)
216 return false;
218 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
219 if (!vecotype)
220 return false;
222 optab optab = optab_for_tree_code (code, vecitype, subtype);
223 if (!optab)
224 return false;
226 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
227 if (icode == CODE_FOR_nothing
228 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
229 return false;
231 *vecotype_out = vecotype;
232 if (vecitype_out)
233 *vecitype_out = vecitype;
234 return true;
237 /* Round bit precision PRECISION up to a full element. */
239 static unsigned int
240 vect_element_precision (unsigned int precision)
242 precision = 1 << ceil_log2 (precision);
243 return MAX (precision, BITS_PER_UNIT);
246 /* If OP is defined by a statement that's being considered for vectorization,
247 return information about that statement, otherwise return NULL. */
249 static stmt_vec_info
250 vect_get_internal_def (vec_info *vinfo, tree op)
252 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
253 if (def_stmt_info
254 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
255 return def_stmt_info;
256 return NULL;
259 /* Check whether NAME, an ssa-name used in STMT_VINFO,
260 is a result of a type promotion, such that:
261 DEF_STMT: NAME = NOP (name0)
262 If CHECK_SIGN is TRUE, check that either both types are signed or both are
263 unsigned. */
265 static bool
266 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
267 tree *orig_type, gimple **def_stmt, bool *promotion)
269 tree type = TREE_TYPE (name);
270 tree oprnd0;
271 enum vect_def_type dt;
273 stmt_vec_info def_stmt_info;
274 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
275 return false;
277 if (dt != vect_internal_def
278 && dt != vect_external_def && dt != vect_constant_def)
279 return false;
281 if (!*def_stmt)
282 return false;
284 if (!is_gimple_assign (*def_stmt))
285 return false;
287 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
288 return false;
290 oprnd0 = gimple_assign_rhs1 (*def_stmt);
292 *orig_type = TREE_TYPE (oprnd0);
293 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
294 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
295 return false;
297 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
298 *promotion = true;
299 else
300 *promotion = false;
302 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
303 return false;
305 return true;
308 /* Holds information about an input operand after some sign changes
309 and type promotions have been peeled away. */
310 class vect_unpromoted_value {
311 public:
312 vect_unpromoted_value ();
314 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
316 /* The value obtained after peeling away zero or more casts. */
317 tree op;
319 /* The type of OP. */
320 tree type;
322 /* The definition type of OP. */
323 vect_def_type dt;
325 /* If OP is the result of peeling at least one cast, and if the cast
326 of OP itself is a vectorizable statement, CASTER identifies that
327 statement, otherwise it is null. */
328 stmt_vec_info caster;
331 inline vect_unpromoted_value::vect_unpromoted_value ()
332 : op (NULL_TREE),
333 type (NULL_TREE),
334 dt (vect_uninitialized_def),
335 caster (NULL)
339 /* Set the operand to OP_IN, its definition type to DT_IN, and the
340 statement that casts it to CASTER_IN. */
342 inline void
343 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
344 stmt_vec_info caster_in)
346 op = op_in;
347 type = TREE_TYPE (op);
348 dt = dt_in;
349 caster = caster_in;
352 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
353 to reach some vectorizable inner operand OP', continuing as long as it
354 is possible to convert OP' back to OP using a possible sign change
355 followed by a possible promotion P. Return this OP', or null if OP is
356 not a vectorizable SSA name. If there is a promotion P, describe its
357 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
358 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
359 have more than one user.
361 A successful return means that it is possible to go from OP' to OP
362 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
363 whereas the cast from UNPROM to OP might be a promotion, a sign
364 change, or a nop.
366 E.g. say we have:
368 signed short *ptr = ...;
369 signed short C = *ptr;
370 unsigned short B = (unsigned short) C; // sign change
371 signed int A = (signed int) B; // unsigned promotion
372 ...possible other uses of A...
373 unsigned int OP = (unsigned int) A; // sign change
375 In this case it's possible to go directly from C to OP using:
377 OP = (unsigned int) (unsigned short) C;
378 +------------+ +--------------+
379 promotion sign change
381 so OP' would be C. The input to the promotion is B, so UNPROM
382 would describe B. */
384 static tree
385 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
386 vect_unpromoted_value *unprom,
387 bool *single_use_p = NULL)
389 tree res = NULL_TREE;
390 tree op_type = TREE_TYPE (op);
391 unsigned int orig_precision = TYPE_PRECISION (op_type);
392 unsigned int min_precision = orig_precision;
393 stmt_vec_info caster = NULL;
394 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
396 /* See whether OP is simple enough to vectorize. */
397 stmt_vec_info def_stmt_info;
398 gimple *def_stmt;
399 vect_def_type dt;
400 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
401 break;
403 /* If OP is the input of a demotion, skip over it to see whether
404 OP is itself the result of a promotion. If so, the combined
405 effect of the promotion and the demotion might fit the required
406 pattern, otherwise neither operation fits.
408 This copes with cases such as the result of an arithmetic
409 operation being truncated before being stored, and where that
410 arithmetic operation has been recognized as an over-widened one. */
411 if (TYPE_PRECISION (op_type) <= min_precision)
413 /* Use OP as the UNPROM described above if we haven't yet
414 found a promotion, or if using the new input preserves the
415 sign of the previous promotion. */
416 if (!res
417 || TYPE_PRECISION (unprom->type) == orig_precision
418 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
420 unprom->set_op (op, dt, caster);
421 min_precision = TYPE_PRECISION (op_type);
423 /* Stop if we've already seen a promotion and if this
424 conversion does more than change the sign. */
425 else if (TYPE_PRECISION (op_type)
426 != TYPE_PRECISION (unprom->type))
427 break;
429 /* The sequence now extends to OP. */
430 res = op;
433 /* See whether OP is defined by a cast. Record it as CASTER if
434 the cast is potentially vectorizable. */
435 if (!def_stmt)
436 break;
437 caster = def_stmt_info;
439 /* Ignore pattern statements, since we don't link uses for them. */
440 if (caster
441 && single_use_p
442 && !STMT_VINFO_RELATED_STMT (caster)
443 && !has_single_use (res))
444 *single_use_p = false;
446 gassign *assign = dyn_cast <gassign *> (def_stmt);
447 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
448 break;
450 /* Continue with the input to the cast. */
451 op = gimple_assign_rhs1 (def_stmt);
452 op_type = TREE_TYPE (op);
454 return res;
457 /* OP is an integer operand to an operation that returns TYPE, and we
458 want to treat the operation as a widening one. So far we can treat
459 it as widening from *COMMON_TYPE.
461 Return true if OP is suitable for such a widening operation,
462 either widening from *COMMON_TYPE or from some supertype of it.
463 Update *COMMON_TYPE to the supertype in the latter case.
465 SHIFT_P is true if OP is a shift amount. */
467 static bool
468 vect_joust_widened_integer (tree type, bool shift_p, tree op,
469 tree *common_type)
471 /* Calculate the minimum precision required by OP, without changing
472 the sign of either operand. */
473 unsigned int precision;
474 if (shift_p)
476 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
477 return false;
478 precision = TREE_INT_CST_LOW (op);
480 else
482 precision = wi::min_precision (wi::to_widest (op),
483 TYPE_SIGN (*common_type));
484 if (precision * 2 > TYPE_PRECISION (type))
485 return false;
488 /* If OP requires a wider type, switch to that type. The checks
489 above ensure that this is still narrower than the result. */
490 precision = vect_element_precision (precision);
491 if (TYPE_PRECISION (*common_type) < precision)
492 *common_type = build_nonstandard_integer_type
493 (precision, TYPE_UNSIGNED (*common_type));
494 return true;
497 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
498 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
500 static bool
501 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
503 if (types_compatible_p (*common_type, new_type))
504 return true;
506 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
507 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
508 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
509 return true;
511 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
512 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
513 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
515 *common_type = new_type;
516 return true;
519 /* We have mismatched signs, with the signed type being
520 no wider than the unsigned type. In this case we need
521 a wider signed type. */
522 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
523 TYPE_PRECISION (new_type));
524 precision *= 2;
526 if (precision * 2 > TYPE_PRECISION (type))
527 return false;
529 *common_type = build_nonstandard_integer_type (precision, false);
530 return true;
533 /* Check whether STMT_INFO can be viewed as a tree of integer operations
534 in which each node either performs CODE or WIDENED_CODE, and where
535 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
536 specifies the maximum number of leaf operands. SHIFT_P says whether
537 CODE and WIDENED_CODE are some sort of shift.
539 If STMT_INFO is such a tree, return the number of leaf operands
540 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
541 to a type that (a) is narrower than the result of STMT_INFO and
542 (b) can hold all leaf operand values.
544 If SUBTYPE then allow that the signs of the operands
545 may differ in signs but not in precision. SUBTYPE is updated to reflect
546 this.
548 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
549 exists. */
551 static unsigned int
552 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
553 tree_code widened_code, bool shift_p,
554 unsigned int max_nops,
555 vect_unpromoted_value *unprom, tree *common_type,
556 enum optab_subtype *subtype = NULL)
558 /* Check for an integer operation with the right code. */
559 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
560 if (!assign)
561 return 0;
563 tree_code rhs_code = gimple_assign_rhs_code (assign);
564 if (rhs_code != code && rhs_code != widened_code)
565 return 0;
567 tree type = TREE_TYPE (gimple_assign_lhs (assign));
568 if (!INTEGRAL_TYPE_P (type))
569 return 0;
571 /* Assume that both operands will be leaf operands. */
572 max_nops -= 2;
574 /* Check the operands. */
575 unsigned int next_op = 0;
576 for (unsigned int i = 0; i < 2; ++i)
578 vect_unpromoted_value *this_unprom = &unprom[next_op];
579 unsigned int nops = 1;
580 tree op = gimple_op (assign, i + 1);
581 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
583 /* We already have a common type from earlier operands.
584 Update it to account for OP. */
585 this_unprom->set_op (op, vect_constant_def);
586 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
587 return 0;
589 else
591 /* Only allow shifts by constants. */
592 if (shift_p && i == 1)
593 return 0;
595 if (!vect_look_through_possible_promotion (vinfo, op, this_unprom))
596 return 0;
598 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
600 /* The operand isn't widened. If STMT_INFO has the code
601 for an unwidened operation, recursively check whether
602 this operand is a node of the tree. */
603 if (rhs_code != code
604 || max_nops == 0
605 || this_unprom->dt != vect_internal_def)
606 return 0;
608 /* Give back the leaf slot allocated above now that we're
609 not treating this as a leaf operand. */
610 max_nops += 1;
612 /* Recursively process the definition of the operand. */
613 stmt_vec_info def_stmt_info
614 = vinfo->lookup_def (this_unprom->op);
615 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
616 widened_code, shift_p, max_nops,
617 this_unprom, common_type,
618 subtype);
619 if (nops == 0)
620 return 0;
622 max_nops -= nops;
624 else
626 /* Make sure that the operand is narrower than the result. */
627 if (TYPE_PRECISION (this_unprom->type) * 2
628 > TYPE_PRECISION (type))
629 return 0;
631 /* Update COMMON_TYPE for the new operand. */
632 if (i == 0)
633 *common_type = this_unprom->type;
634 else if (!vect_joust_widened_type (type, this_unprom->type,
635 common_type))
637 if (subtype)
639 /* See if we can sign extend the smaller type. */
640 if (TYPE_PRECISION (this_unprom->type)
641 > TYPE_PRECISION (*common_type))
642 *common_type = this_unprom->type;
643 *subtype = optab_vector_mixed_sign;
645 else
646 return 0;
650 next_op += nops;
652 return next_op;
655 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
656 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
658 static tree
659 vect_recog_temp_ssa_var (tree type, gimple *stmt)
661 return make_temp_ssa_name (type, stmt, "patt");
664 /* STMT2_INFO describes a type conversion that could be split into STMT1
665 followed by a version of STMT2_INFO that takes NEW_RHS as its first
666 input. Try to do this using pattern statements, returning true on
667 success. */
669 static bool
670 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
671 gimple *stmt1, tree vectype)
673 if (is_pattern_stmt_p (stmt2_info))
675 /* STMT2_INFO is part of a pattern. Get the statement to which
676 the pattern is attached. */
677 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
678 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location,
682 "Splitting pattern statement: %G", stmt2_info->stmt);
684 /* Since STMT2_INFO is a pattern statement, we can change it
685 in-situ without worrying about changing the code for the
686 containing block. */
687 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
689 if (dump_enabled_p ())
691 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
692 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
693 stmt2_info->stmt);
696 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
697 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
698 /* STMT2_INFO is the actual pattern statement. Add STMT1
699 to the end of the definition sequence. */
700 gimple_seq_add_stmt_without_update (def_seq, stmt1);
701 else
703 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
704 before it. */
705 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
706 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
708 return true;
710 else
712 /* STMT2_INFO doesn't yet have a pattern. Try to create a
713 two-statement pattern now. */
714 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
715 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
716 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
717 if (!lhs_vectype)
718 return false;
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_NOTE, vect_location,
722 "Splitting statement: %G", stmt2_info->stmt);
724 /* Add STMT1 as a singleton pattern definition sequence. */
725 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
726 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
727 gimple_seq_add_stmt_without_update (def_seq, stmt1);
729 /* Build the second of the two pattern statements. */
730 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
731 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
732 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
734 if (dump_enabled_p ())
736 dump_printf_loc (MSG_NOTE, vect_location,
737 "into pattern statements: %G", stmt1);
738 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
741 return true;
745 /* Convert UNPROM to TYPE and return the result, adding new statements
746 to STMT_INFO's pattern definition statements if no better way is
747 available. VECTYPE is the vector form of TYPE.
749 If SUBTYPE then convert the type based on the subtype. */
751 static tree
752 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
753 vect_unpromoted_value *unprom, tree vectype,
754 enum optab_subtype subtype = optab_default)
757 /* Update the type if the signs differ. */
758 if (subtype == optab_vector_mixed_sign
759 && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
760 type = build_nonstandard_integer_type (TYPE_PRECISION (type),
761 TYPE_SIGN (unprom->type));
763 /* Check for a no-op conversion. */
764 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
765 return unprom->op;
767 /* Allow the caller to create constant vect_unpromoted_values. */
768 if (TREE_CODE (unprom->op) == INTEGER_CST)
769 return wide_int_to_tree (type, wi::to_widest (unprom->op));
771 tree input = unprom->op;
772 if (unprom->caster)
774 tree lhs = gimple_get_lhs (unprom->caster->stmt);
775 tree lhs_type = TREE_TYPE (lhs);
777 /* If the result of the existing cast is the right width, use it
778 instead of the source of the cast. */
779 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
780 input = lhs;
781 /* If the precision we want is between the source and result
782 precisions of the existing cast, try splitting the cast into
783 two and tapping into a mid-way point. */
784 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
785 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
787 /* In order to preserve the semantics of the original cast,
788 give the mid-way point the same signedness as the input value.
790 It would be possible to use a signed type here instead if
791 TYPE is signed and UNPROM->TYPE is unsigned, but that would
792 make the sign of the midtype sensitive to the order in
793 which we process the statements, since the signedness of
794 TYPE is the signedness required by just one of possibly
795 many users. Also, unsigned promotions are usually as cheap
796 as or cheaper than signed ones, so it's better to keep an
797 unsigned promotion. */
798 tree midtype = build_nonstandard_integer_type
799 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
800 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
801 if (vec_midtype)
803 input = vect_recog_temp_ssa_var (midtype, NULL);
804 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
805 unprom->op);
806 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
807 vec_midtype))
808 append_pattern_def_seq (vinfo, stmt_info,
809 new_stmt, vec_midtype);
813 /* See if we can reuse an existing result. */
814 if (types_compatible_p (type, TREE_TYPE (input)))
815 return input;
818 /* We need a new conversion statement. */
819 tree new_op = vect_recog_temp_ssa_var (type, NULL);
820 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
822 /* If OP is an external value, see if we can insert the new statement
823 on an incoming edge. */
824 if (input == unprom->op && unprom->dt == vect_external_def)
825 if (edge e = vect_get_external_def_edge (vinfo, input))
827 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
828 gcc_assert (!new_bb);
829 return new_op;
832 /* As a (common) last resort, add the statement to the pattern itself. */
833 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
834 return new_op;
837 /* Invoke vect_convert_input for N elements of UNPROM and store the
838 result in the corresponding elements of RESULT.
840 If SUBTYPE then convert the type based on the subtype. */
842 static void
843 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
844 tree *result, tree type, vect_unpromoted_value *unprom,
845 tree vectype, enum optab_subtype subtype = optab_default)
847 for (unsigned int i = 0; i < n; ++i)
849 unsigned int j;
850 for (j = 0; j < i; ++j)
851 if (unprom[j].op == unprom[i].op)
852 break;
854 if (j < i)
855 result[i] = result[j];
856 else
857 result[i] = vect_convert_input (vinfo, stmt_info,
858 type, &unprom[i], vectype, subtype);
862 /* The caller has created a (possibly empty) sequence of pattern definition
863 statements followed by a single statement PATTERN_STMT. Cast the result
864 of this final statement to TYPE. If a new statement is needed, add
865 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
866 and return the new statement, otherwise return PATTERN_STMT as-is.
867 VECITYPE is the vector form of PATTERN_STMT's result type. */
869 static gimple *
870 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
871 gimple *pattern_stmt, tree vecitype)
873 tree lhs = gimple_get_lhs (pattern_stmt);
874 if (!types_compatible_p (type, TREE_TYPE (lhs)))
876 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
877 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
878 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
880 return pattern_stmt;
883 /* Return true if STMT_VINFO describes a reduction for which reassociation
884 is allowed. If STMT_INFO is part of a group, assume that it's part of
885 a reduction chain and optimistically assume that all statements
886 except the last allow reassociation.
887 Also require it to have code CODE and to be a reduction
888 in the outermost loop. When returning true, store the operands in
889 *OP0_OUT and *OP1_OUT. */
891 static bool
892 vect_reassociating_reduction_p (vec_info *vinfo,
893 stmt_vec_info stmt_info, tree_code code,
894 tree *op0_out, tree *op1_out)
896 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
897 if (!loop_info)
898 return false;
900 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
901 if (!assign || gimple_assign_rhs_code (assign) != code)
902 return false;
904 /* We don't allow changing the order of the computation in the inner-loop
905 when doing outer-loop vectorization. */
906 class loop *loop = LOOP_VINFO_LOOP (loop_info);
907 if (loop && nested_in_vect_loop_p (loop, stmt_info))
908 return false;
910 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
912 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
913 code))
914 return false;
916 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
917 return false;
919 *op0_out = gimple_assign_rhs1 (assign);
920 *op1_out = gimple_assign_rhs2 (assign);
921 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
922 std::swap (*op0_out, *op1_out);
923 return true;
926 /* Function vect_recog_dot_prod_pattern
928 Try to find the following pattern:
930 type1a x_t
931 type1b y_t;
932 TYPE1 prod;
933 TYPE2 sum = init;
934 loop:
935 sum_0 = phi <init, sum_1>
936 S1 x_t = ...
937 S2 y_t = ...
938 S3 x_T = (TYPE1) x_t;
939 S4 y_T = (TYPE1) y_t;
940 S5 prod = x_T * y_T;
941 [S6 prod = (TYPE2) prod; #optional]
942 S7 sum_1 = prod + sum_0;
944 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
945 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
946 'type1a' and 'type1b' can differ.
948 Input:
950 * STMT_VINFO: The stmt from which the pattern search begins. In the
951 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
952 will be detected.
954 Output:
956 * TYPE_OUT: The type of the output of this pattern.
958 * Return value: A new stmt that will be used to replace the sequence of
959 stmts that constitute the pattern. In this case it will be:
960 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
962 Note: The dot-prod idiom is a widening reduction pattern that is
963 vectorized without preserving all the intermediate results. It
964 produces only N/2 (widened) results (by summing up pairs of
965 intermediate results) rather than all N results. Therefore, we
966 cannot allow this pattern when we want to get all the results and in
967 the correct order (as is the case when this computation is in an
968 inner-loop nested in an outer-loop that us being vectorized). */
970 static gimple *
971 vect_recog_dot_prod_pattern (vec_info *vinfo,
972 stmt_vec_info stmt_vinfo, tree *type_out)
974 tree oprnd0, oprnd1;
975 gimple *last_stmt = stmt_vinfo->stmt;
976 tree type, half_type;
977 gimple *pattern_stmt;
978 tree var;
980 /* Look for the following pattern
981 DX = (TYPE1) X;
982 DY = (TYPE1) Y;
983 DPROD = DX * DY;
984 DDPROD = (TYPE2) DPROD;
985 sum_1 = DDPROD + sum_0;
986 In which
987 - DX is double the size of X
988 - DY is double the size of Y
989 - DX, DY, DPROD all have the same type but the sign
990 between X, Y and DPROD can differ.
991 - sum is the same size of DPROD or bigger
992 - sum has been recognized as a reduction variable.
994 This is equivalent to:
995 DPROD = X w* Y; #widen mult
996 sum_1 = DPROD w+ sum_0; #widen summation
998 DPROD = X w* Y; #widen mult
999 sum_1 = DPROD + sum_0; #summation
1002 /* Starting from LAST_STMT, follow the defs of its uses in search
1003 of the above pattern. */
1005 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1006 &oprnd0, &oprnd1))
1007 return NULL;
1009 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1011 vect_unpromoted_value unprom_mult;
1012 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1014 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1015 we know that oprnd1 is the reduction variable (defined by a loop-header
1016 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1017 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1018 if (!oprnd0)
1019 return NULL;
1021 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1022 if (!mult_vinfo)
1023 return NULL;
1025 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1026 inside the loop (in case we are analyzing an outer-loop). */
1027 vect_unpromoted_value unprom0[2];
1028 enum optab_subtype subtype = optab_vector;
1029 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1030 false, 2, unprom0, &half_type, &subtype))
1031 return NULL;
1033 /* If there are two widening operations, make sure they agree on the sign
1034 of the extension. The result of an optab_vector_mixed_sign operation
1035 is signed; otherwise, the result has the same sign as the operands. */
1036 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1037 && (subtype == optab_vector_mixed_sign
1038 ? TYPE_UNSIGNED (unprom_mult.type)
1039 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1040 return NULL;
1042 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1044 tree half_vectype;
1045 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1046 type_out, &half_vectype, subtype))
1047 return NULL;
1049 /* Get the inputs in the appropriate types. */
1050 tree mult_oprnd[2];
1051 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1052 unprom0, half_vectype, subtype);
1054 var = vect_recog_temp_ssa_var (type, NULL);
1055 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1056 mult_oprnd[0], mult_oprnd[1], oprnd1);
1058 return pattern_stmt;
1062 /* Function vect_recog_sad_pattern
1064 Try to find the following Sum of Absolute Difference (SAD) pattern:
1066 type x_t, y_t;
1067 signed TYPE1 diff, abs_diff;
1068 TYPE2 sum = init;
1069 loop:
1070 sum_0 = phi <init, sum_1>
1071 S1 x_t = ...
1072 S2 y_t = ...
1073 S3 x_T = (TYPE1) x_t;
1074 S4 y_T = (TYPE1) y_t;
1075 S5 diff = x_T - y_T;
1076 S6 abs_diff = ABS_EXPR <diff>;
1077 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1078 S8 sum_1 = abs_diff + sum_0;
1080 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1081 same size of 'TYPE1' or bigger. This is a special case of a reduction
1082 computation.
1084 Input:
1086 * STMT_VINFO: The stmt from which the pattern search begins. In the
1087 example, when this function is called with S8, the pattern
1088 {S3,S4,S5,S6,S7,S8} will be detected.
1090 Output:
1092 * TYPE_OUT: The type of the output of this pattern.
1094 * Return value: A new stmt that will be used to replace the sequence of
1095 stmts that constitute the pattern. In this case it will be:
1096 SAD_EXPR <x_t, y_t, sum_0>
1099 static gimple *
1100 vect_recog_sad_pattern (vec_info *vinfo,
1101 stmt_vec_info stmt_vinfo, tree *type_out)
1103 gimple *last_stmt = stmt_vinfo->stmt;
1104 tree half_type;
1106 /* Look for the following pattern
1107 DX = (TYPE1) X;
1108 DY = (TYPE1) Y;
1109 DDIFF = DX - DY;
1110 DAD = ABS_EXPR <DDIFF>;
1111 DDPROD = (TYPE2) DPROD;
1112 sum_1 = DAD + sum_0;
1113 In which
1114 - DX is at least double the size of X
1115 - DY is at least double the size of Y
1116 - DX, DY, DDIFF, DAD all have the same type
1117 - sum is the same size of DAD or bigger
1118 - sum has been recognized as a reduction variable.
1120 This is equivalent to:
1121 DDIFF = X w- Y; #widen sub
1122 DAD = ABS_EXPR <DDIFF>;
1123 sum_1 = DAD w+ sum_0; #widen summation
1125 DDIFF = X w- Y; #widen sub
1126 DAD = ABS_EXPR <DDIFF>;
1127 sum_1 = DAD + sum_0; #summation
1130 /* Starting from LAST_STMT, follow the defs of its uses in search
1131 of the above pattern. */
1133 tree plus_oprnd0, plus_oprnd1;
1134 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1135 &plus_oprnd0, &plus_oprnd1))
1136 return NULL;
1138 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1140 /* Any non-truncating sequence of conversions is OK here, since
1141 with a successful match, the result of the ABS(U) is known to fit
1142 within the nonnegative range of the result type. (It cannot be the
1143 negative of the minimum signed value due to the range of the widening
1144 MINUS_EXPR.) */
1145 vect_unpromoted_value unprom_abs;
1146 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1147 &unprom_abs);
1149 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1150 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1151 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1152 Then check that plus_oprnd0 is defined by an abs_expr. */
1154 if (!plus_oprnd0)
1155 return NULL;
1157 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1158 if (!abs_stmt_vinfo)
1159 return NULL;
1161 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1162 inside the loop (in case we are analyzing an outer-loop). */
1163 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1164 if (!abs_stmt
1165 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1166 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1167 return NULL;
1169 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1170 tree abs_type = TREE_TYPE (abs_oprnd);
1171 if (TYPE_UNSIGNED (abs_type))
1172 return NULL;
1174 /* Peel off conversions from the ABS input. This can involve sign
1175 changes (e.g. from an unsigned subtraction to a signed ABS input)
1176 or signed promotion, but it can't include unsigned promotion.
1177 (Note that ABS of an unsigned promotion should have been folded
1178 away before now anyway.) */
1179 vect_unpromoted_value unprom_diff;
1180 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1181 &unprom_diff);
1182 if (!abs_oprnd)
1183 return NULL;
1184 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1185 && TYPE_UNSIGNED (unprom_diff.type))
1186 return NULL;
1188 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1189 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1190 if (!diff_stmt_vinfo)
1191 return NULL;
1193 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1194 inside the loop (in case we are analyzing an outer-loop). */
1195 vect_unpromoted_value unprom[2];
1196 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1197 false, 2, unprom, &half_type))
1198 return NULL;
1200 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1202 tree half_vectype;
1203 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1204 type_out, &half_vectype))
1205 return NULL;
1207 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1208 tree sad_oprnd[2];
1209 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1210 unprom, half_vectype);
1212 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1213 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1214 sad_oprnd[1], plus_oprnd1);
1216 return pattern_stmt;
1219 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1220 so that it can be treated as though it had the form:
1222 A_TYPE a;
1223 B_TYPE b;
1224 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1225 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1226 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1227 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1228 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1230 Try to replace the pattern with:
1232 A_TYPE a;
1233 B_TYPE b;
1234 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1235 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1236 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1237 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1239 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1241 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1242 name of the pattern being matched, for dump purposes. */
1244 static gimple *
1245 vect_recog_widen_op_pattern (vec_info *vinfo,
1246 stmt_vec_info last_stmt_info, tree *type_out,
1247 tree_code orig_code, tree_code wide_code,
1248 bool shift_p, const char *name)
1250 gimple *last_stmt = last_stmt_info->stmt;
1252 vect_unpromoted_value unprom[2];
1253 tree half_type;
1254 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1255 shift_p, 2, unprom, &half_type))
1256 return NULL;
1258 /* Pattern detected. */
1259 vect_pattern_detected (name, last_stmt);
1261 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1262 tree itype = type;
1263 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1264 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1265 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1266 TYPE_UNSIGNED (half_type));
1268 /* Check target support */
1269 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1270 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1271 tree ctype = itype;
1272 tree vecctype = vecitype;
1273 if (orig_code == MINUS_EXPR
1274 && TYPE_UNSIGNED (itype)
1275 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1277 /* Subtraction is special, even if half_type is unsigned and no matter
1278 whether type is signed or unsigned, if type is wider than itype,
1279 we need to sign-extend from the widening operation result to the
1280 result type.
1281 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1282 itype unsigned short and type either int or unsigned int.
1283 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1284 (unsigned short) 0xffff, but for type int we want the result -1
1285 and for type unsigned int 0xffffffff rather than 0xffff. */
1286 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1287 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1290 enum tree_code dummy_code;
1291 int dummy_int;
1292 auto_vec<tree> dummy_vec;
1293 if (!vectype
1294 || !vecitype
1295 || !vecctype
1296 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1297 vecitype, vectype,
1298 &dummy_code, &dummy_code,
1299 &dummy_int, &dummy_vec))
1300 return NULL;
1302 *type_out = get_vectype_for_scalar_type (vinfo, type);
1303 if (!*type_out)
1304 return NULL;
1306 tree oprnd[2];
1307 vect_convert_inputs (vinfo, last_stmt_info,
1308 2, oprnd, half_type, unprom, vectype);
1310 tree var = vect_recog_temp_ssa_var (itype, NULL);
1311 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1312 oprnd[0], oprnd[1]);
1314 if (vecctype != vecitype)
1315 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1316 pattern_stmt, vecitype);
1318 return vect_convert_output (vinfo, last_stmt_info,
1319 type, pattern_stmt, vecctype);
1322 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1323 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1325 static gimple *
1326 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1327 tree *type_out)
1329 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1330 MULT_EXPR, WIDEN_MULT_EXPR, false,
1331 "vect_recog_widen_mult_pattern");
1334 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1335 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1337 static gimple *
1338 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1339 tree *type_out)
1341 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1342 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1343 "vect_recog_widen_plus_pattern");
1346 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1347 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1348 static gimple *
1349 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1350 tree *type_out)
1352 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1353 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1354 "vect_recog_widen_minus_pattern");
1357 /* Function vect_recog_popcount_pattern
1359 Try to find the following pattern:
1361 UTYPE1 A;
1362 TYPE1 B;
1363 UTYPE2 temp_in;
1364 TYPE3 temp_out;
1365 temp_in = (UTYPE2)A;
1367 temp_out = __builtin_popcount{,l,ll} (temp_in);
1368 B = (TYPE1) temp_out;
1370 TYPE2 may or may not be equal to TYPE3.
1371 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1372 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1374 Input:
1376 * STMT_VINFO: The stmt from which the pattern search begins.
1377 here it starts with B = (TYPE1) temp_out;
1379 Output:
1381 * TYPE_OUT: The vector type of the output of this pattern.
1383 * Return value: A new stmt that will be used to replace the sequence of
1384 stmts that constitute the pattern. In this case it will be:
1385 B = .POPCOUNT (A);
1388 static gimple *
1389 vect_recog_popcount_pattern (vec_info *vinfo,
1390 stmt_vec_info stmt_vinfo, tree *type_out)
1392 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1393 gimple *popcount_stmt, *pattern_stmt;
1394 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1395 auto_vec<tree> vargs;
1397 /* Find B = (TYPE1) temp_out. */
1398 if (!last_stmt)
1399 return NULL;
1400 tree_code code = gimple_assign_rhs_code (last_stmt);
1401 if (!CONVERT_EXPR_CODE_P (code))
1402 return NULL;
1404 lhs_oprnd = gimple_assign_lhs (last_stmt);
1405 lhs_type = TREE_TYPE (lhs_oprnd);
1406 if (!INTEGRAL_TYPE_P (lhs_type))
1407 return NULL;
1409 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1410 if (TREE_CODE (rhs_oprnd) != SSA_NAME
1411 || !has_single_use (rhs_oprnd))
1412 return NULL;
1413 popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1415 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1416 if (!is_gimple_call (popcount_stmt))
1417 return NULL;
1418 switch (gimple_call_combined_fn (popcount_stmt))
1420 CASE_CFN_POPCOUNT:
1421 break;
1422 default:
1423 return NULL;
1426 if (gimple_call_num_args (popcount_stmt) != 1)
1427 return NULL;
1429 rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1430 vect_unpromoted_value unprom_diff;
1431 rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1432 &unprom_diff);
1434 if (!rhs_origin)
1435 return NULL;
1437 /* Input and output of .POPCOUNT should be same-precision integer.
1438 Also A should be unsigned or same precision as temp_in,
1439 otherwise there would be sign_extend from A to temp_in. */
1440 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1441 || (!TYPE_UNSIGNED (unprom_diff.type)
1442 && (TYPE_PRECISION (unprom_diff.type)
1443 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1444 return NULL;
1445 vargs.safe_push (unprom_diff.op);
1447 vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1448 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1449 /* Do it only if the backend has popcount<vector_mode>2 pattern. */
1450 if (!vec_type
1451 || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1452 OPTIMIZE_FOR_SPEED))
1453 return NULL;
1455 /* Create B = .POPCOUNT (A). */
1456 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1457 pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1458 gimple_call_set_lhs (pattern_stmt, new_var);
1459 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1460 *type_out = vec_type;
1462 if (dump_enabled_p ())
1463 dump_printf_loc (MSG_NOTE, vect_location,
1464 "created pattern stmt: %G", pattern_stmt);
1465 return pattern_stmt;
1468 /* Function vect_recog_pow_pattern
1470 Try to find the following pattern:
1472 x = POW (y, N);
1474 with POW being one of pow, powf, powi, powif and N being
1475 either 2 or 0.5.
1477 Input:
1479 * STMT_VINFO: The stmt from which the pattern search begins.
1481 Output:
1483 * TYPE_OUT: The type of the output of this pattern.
1485 * Return value: A new stmt that will be used to replace the sequence of
1486 stmts that constitute the pattern. In this case it will be:
1487 x = x * x
1489 x = sqrt (x)
1492 static gimple *
1493 vect_recog_pow_pattern (vec_info *vinfo,
1494 stmt_vec_info stmt_vinfo, tree *type_out)
1496 gimple *last_stmt = stmt_vinfo->stmt;
1497 tree base, exp;
1498 gimple *stmt;
1499 tree var;
1501 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1502 return NULL;
1504 switch (gimple_call_combined_fn (last_stmt))
1506 CASE_CFN_POW:
1507 CASE_CFN_POWI:
1508 break;
1510 default:
1511 return NULL;
1514 base = gimple_call_arg (last_stmt, 0);
1515 exp = gimple_call_arg (last_stmt, 1);
1516 if (TREE_CODE (exp) != REAL_CST
1517 && TREE_CODE (exp) != INTEGER_CST)
1519 if (flag_unsafe_math_optimizations
1520 && TREE_CODE (base) == REAL_CST
1521 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1523 combined_fn log_cfn;
1524 built_in_function exp_bfn;
1525 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1527 case BUILT_IN_POW:
1528 log_cfn = CFN_BUILT_IN_LOG;
1529 exp_bfn = BUILT_IN_EXP;
1530 break;
1531 case BUILT_IN_POWF:
1532 log_cfn = CFN_BUILT_IN_LOGF;
1533 exp_bfn = BUILT_IN_EXPF;
1534 break;
1535 case BUILT_IN_POWL:
1536 log_cfn = CFN_BUILT_IN_LOGL;
1537 exp_bfn = BUILT_IN_EXPL;
1538 break;
1539 default:
1540 return NULL;
1542 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1543 tree exp_decl = builtin_decl_implicit (exp_bfn);
1544 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1545 does that, but if C is a power of 2, we want to use
1546 exp2 (log2 (C) * x) in the non-vectorized version, but for
1547 vectorization we don't have vectorized exp2. */
1548 if (logc
1549 && TREE_CODE (logc) == REAL_CST
1550 && exp_decl
1551 && lookup_attribute ("omp declare simd",
1552 DECL_ATTRIBUTES (exp_decl)))
1554 cgraph_node *node = cgraph_node::get_create (exp_decl);
1555 if (node->simd_clones == NULL)
1557 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1558 || node->definition)
1559 return NULL;
1560 expand_simd_clones (node);
1561 if (node->simd_clones == NULL)
1562 return NULL;
1564 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1565 if (!*type_out)
1566 return NULL;
1567 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1568 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1569 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1570 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1571 g = gimple_build_call (exp_decl, 1, def);
1572 gimple_call_set_lhs (g, res);
1573 return g;
1577 return NULL;
1580 /* We now have a pow or powi builtin function call with a constant
1581 exponent. */
1583 /* Catch squaring. */
1584 if ((tree_fits_shwi_p (exp)
1585 && tree_to_shwi (exp) == 2)
1586 || (TREE_CODE (exp) == REAL_CST
1587 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1589 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1590 TREE_TYPE (base), type_out))
1591 return NULL;
1593 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1594 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1595 return stmt;
1598 /* Catch square root. */
1599 if (TREE_CODE (exp) == REAL_CST
1600 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1602 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1603 if (*type_out
1604 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1605 OPTIMIZE_FOR_SPEED))
1607 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1608 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1609 gimple_call_set_lhs (stmt, var);
1610 gimple_call_set_nothrow (stmt, true);
1611 return stmt;
1615 return NULL;
1619 /* Function vect_recog_widen_sum_pattern
1621 Try to find the following pattern:
1623 type x_t;
1624 TYPE x_T, sum = init;
1625 loop:
1626 sum_0 = phi <init, sum_1>
1627 S1 x_t = *p;
1628 S2 x_T = (TYPE) x_t;
1629 S3 sum_1 = x_T + sum_0;
1631 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1632 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1633 a special case of a reduction computation.
1635 Input:
1637 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1638 when this function is called with S3, the pattern {S2,S3} will be detected.
1640 Output:
1642 * TYPE_OUT: The type of the output of this pattern.
1644 * Return value: A new stmt that will be used to replace the sequence of
1645 stmts that constitute the pattern. In this case it will be:
1646 WIDEN_SUM <x_t, sum_0>
1648 Note: The widening-sum idiom is a widening reduction pattern that is
1649 vectorized without preserving all the intermediate results. It
1650 produces only N/2 (widened) results (by summing up pairs of
1651 intermediate results) rather than all N results. Therefore, we
1652 cannot allow this pattern when we want to get all the results and in
1653 the correct order (as is the case when this computation is in an
1654 inner-loop nested in an outer-loop that us being vectorized). */
1656 static gimple *
1657 vect_recog_widen_sum_pattern (vec_info *vinfo,
1658 stmt_vec_info stmt_vinfo, tree *type_out)
1660 gimple *last_stmt = stmt_vinfo->stmt;
1661 tree oprnd0, oprnd1;
1662 tree type;
1663 gimple *pattern_stmt;
1664 tree var;
1666 /* Look for the following pattern
1667 DX = (TYPE) X;
1668 sum_1 = DX + sum_0;
1669 In which DX is at least double the size of X, and sum_1 has been
1670 recognized as a reduction variable.
1673 /* Starting from LAST_STMT, follow the defs of its uses in search
1674 of the above pattern. */
1676 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1677 &oprnd0, &oprnd1))
1678 return NULL;
1680 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1682 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1683 we know that oprnd1 is the reduction variable (defined by a loop-header
1684 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1685 Left to check that oprnd0 is defined by a cast from type 'type' to type
1686 'TYPE'. */
1688 vect_unpromoted_value unprom0;
1689 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1690 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1691 return NULL;
1693 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1695 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1696 unprom0.type, type_out))
1697 return NULL;
1699 var = vect_recog_temp_ssa_var (type, NULL);
1700 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1702 return pattern_stmt;
1705 /* Recognize cases in which an operation is performed in one type WTYPE
1706 but could be done more efficiently in a narrower type NTYPE. For example,
1707 if we have:
1709 ATYPE a; // narrower than NTYPE
1710 BTYPE b; // narrower than NTYPE
1711 WTYPE aw = (WTYPE) a;
1712 WTYPE bw = (WTYPE) b;
1713 WTYPE res = aw + bw; // only uses of aw and bw
1715 then it would be more efficient to do:
1717 NTYPE an = (NTYPE) a;
1718 NTYPE bn = (NTYPE) b;
1719 NTYPE resn = an + bn;
1720 WTYPE res = (WTYPE) resn;
1722 Other situations include things like:
1724 ATYPE a; // NTYPE or narrower
1725 WTYPE aw = (WTYPE) a;
1726 WTYPE res = aw + b;
1728 when only "(NTYPE) res" is significant. In that case it's more efficient
1729 to truncate "b" and do the operation on NTYPE instead:
1731 NTYPE an = (NTYPE) a;
1732 NTYPE bn = (NTYPE) b; // truncation
1733 NTYPE resn = an + bn;
1734 WTYPE res = (WTYPE) resn;
1736 All users of "res" should then use "resn" instead, making the final
1737 statement dead (not marked as relevant). The final statement is still
1738 needed to maintain the type correctness of the IR.
1740 vect_determine_precisions has already determined the minimum
1741 precison of the operation and the minimum precision required
1742 by users of the result. */
1744 static gimple *
1745 vect_recog_over_widening_pattern (vec_info *vinfo,
1746 stmt_vec_info last_stmt_info, tree *type_out)
1748 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1749 if (!last_stmt)
1750 return NULL;
1752 /* See whether we have found that this operation can be done on a
1753 narrower type without changing its semantics. */
1754 unsigned int new_precision = last_stmt_info->operation_precision;
1755 if (!new_precision)
1756 return NULL;
1758 tree lhs = gimple_assign_lhs (last_stmt);
1759 tree type = TREE_TYPE (lhs);
1760 tree_code code = gimple_assign_rhs_code (last_stmt);
1762 /* Punt for reductions where we don't handle the type conversions. */
1763 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1764 return NULL;
1766 /* Keep the first operand of a COND_EXPR as-is: only the other two
1767 operands are interesting. */
1768 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1770 /* Check the operands. */
1771 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1772 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1773 unprom.quick_grow (nops);
1774 unsigned int min_precision = 0;
1775 bool single_use_p = false;
1776 for (unsigned int i = 0; i < nops; ++i)
1778 tree op = gimple_op (last_stmt, first_op + i);
1779 if (TREE_CODE (op) == INTEGER_CST)
1780 unprom[i].set_op (op, vect_constant_def);
1781 else if (TREE_CODE (op) == SSA_NAME)
1783 bool op_single_use_p = true;
1784 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1785 &op_single_use_p))
1786 return NULL;
1787 /* If:
1789 (1) N bits of the result are needed;
1790 (2) all inputs are widened from M<N bits; and
1791 (3) one operand OP is a single-use SSA name
1793 we can shift the M->N widening from OP to the output
1794 without changing the number or type of extensions involved.
1795 This then reduces the number of copies of STMT_INFO.
1797 If instead of (3) more than one operand is a single-use SSA name,
1798 shifting the extension to the output is even more of a win.
1800 If instead:
1802 (1) N bits of the result are needed;
1803 (2) one operand OP2 is widened from M2<N bits;
1804 (3) another operand OP1 is widened from M1<M2 bits; and
1805 (4) both OP1 and OP2 are single-use
1807 the choice is between:
1809 (a) truncating OP2 to M1, doing the operation on M1,
1810 and then widening the result to N
1812 (b) widening OP1 to M2, doing the operation on M2, and then
1813 widening the result to N
1815 Both shift the M2->N widening of the inputs to the output.
1816 (a) additionally shifts the M1->M2 widening to the output;
1817 it requires fewer copies of STMT_INFO but requires an extra
1818 M2->M1 truncation.
1820 Which is better will depend on the complexity and cost of
1821 STMT_INFO, which is hard to predict at this stage. However,
1822 a clear tie-breaker in favor of (b) is the fact that the
1823 truncation in (a) increases the length of the operation chain.
1825 If instead of (4) only one of OP1 or OP2 is single-use,
1826 (b) is still a win over doing the operation in N bits:
1827 it still shifts the M2->N widening on the single-use operand
1828 to the output and reduces the number of STMT_INFO copies.
1830 If neither operand is single-use then operating on fewer than
1831 N bits might lead to more extensions overall. Whether it does
1832 or not depends on global information about the vectorization
1833 region, and whether that's a good trade-off would again
1834 depend on the complexity and cost of the statements involved,
1835 as well as things like register pressure that are not normally
1836 modelled at this stage. We therefore ignore these cases
1837 and just optimize the clear single-use wins above.
1839 Thus we take the maximum precision of the unpromoted operands
1840 and record whether any operand is single-use. */
1841 if (unprom[i].dt == vect_internal_def)
1843 min_precision = MAX (min_precision,
1844 TYPE_PRECISION (unprom[i].type));
1845 single_use_p |= op_single_use_p;
1848 else
1849 return NULL;
1852 /* Although the operation could be done in operation_precision, we have
1853 to balance that against introducing extra truncations or extensions.
1854 Calculate the minimum precision that can be handled efficiently.
1856 The loop above determined that the operation could be handled
1857 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1858 extension from the inputs to the output without introducing more
1859 instructions, and would reduce the number of instructions required
1860 for STMT_INFO itself.
1862 vect_determine_precisions has also determined that the result only
1863 needs min_output_precision bits. Truncating by a factor of N times
1864 requires a tree of N - 1 instructions, so if TYPE is N times wider
1865 than min_output_precision, doing the operation in TYPE and truncating
1866 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1867 In contrast:
1869 - truncating the input to a unary operation and doing the operation
1870 in the new type requires at most N - 1 + 1 = N instructions per
1871 output vector
1873 - doing the same for a binary operation requires at most
1874 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1876 Both unary and binary operations require fewer instructions than
1877 this if the operands were extended from a suitable truncated form.
1878 Thus there is usually nothing to lose by doing operations in
1879 min_output_precision bits, but there can be something to gain. */
1880 if (!single_use_p)
1881 min_precision = last_stmt_info->min_output_precision;
1882 else
1883 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1885 /* Apply the minimum efficient precision we just calculated. */
1886 if (new_precision < min_precision)
1887 new_precision = min_precision;
1888 new_precision = vect_element_precision (new_precision);
1889 if (new_precision >= TYPE_PRECISION (type))
1890 return NULL;
1892 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1894 *type_out = get_vectype_for_scalar_type (vinfo, type);
1895 if (!*type_out)
1896 return NULL;
1898 /* We've found a viable pattern. Get the new type of the operation. */
1899 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1900 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1902 /* If we're truncating an operation, we need to make sure that we
1903 don't introduce new undefined overflow. The codes tested here are
1904 a subset of those accepted by vect_truncatable_operation_p. */
1905 tree op_type = new_type;
1906 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1907 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1908 op_type = build_nonstandard_integer_type (new_precision, true);
1910 /* We specifically don't check here whether the target supports the
1911 new operation, since it might be something that a later pattern
1912 wants to rewrite anyway. If targets have a minimum element size
1913 for some optabs, we should pattern-match smaller ops to larger ops
1914 where beneficial. */
1915 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1916 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
1917 if (!new_vectype || !op_vectype)
1918 return NULL;
1920 if (dump_enabled_p ())
1921 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1922 type, new_type);
1924 /* Calculate the rhs operands for an operation on OP_TYPE. */
1925 tree ops[3] = {};
1926 for (unsigned int i = 1; i < first_op; ++i)
1927 ops[i - 1] = gimple_op (last_stmt, i);
1928 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
1929 op_type, &unprom[0], op_vectype);
1931 /* Use the operation to produce a result of type OP_TYPE. */
1932 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1933 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1934 ops[0], ops[1], ops[2]);
1935 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1937 if (dump_enabled_p ())
1938 dump_printf_loc (MSG_NOTE, vect_location,
1939 "created pattern stmt: %G", pattern_stmt);
1941 /* Convert back to the original signedness, if OP_TYPE is different
1942 from NEW_TYPE. */
1943 if (op_type != new_type)
1944 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
1945 pattern_stmt, op_vectype);
1947 /* Promote the result to the original type. */
1948 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
1949 pattern_stmt, new_vectype);
1951 return pattern_stmt;
1954 /* Recognize the following patterns:
1956 ATYPE a; // narrower than TYPE
1957 BTYPE b; // narrower than TYPE
1959 1) Multiply high with scaling
1960 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1961 Here, c is bitsize (TYPE) / 2 - 1.
1963 2) ... or also with rounding
1964 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1965 Here, d is bitsize (TYPE) / 2 - 2.
1967 3) Normal multiply high
1968 TYPE res = ((TYPE) a * (TYPE) b) >> e;
1969 Here, e is bitsize (TYPE) / 2.
1971 where only the bottom half of res is used. */
1973 static gimple *
1974 vect_recog_mulhs_pattern (vec_info *vinfo,
1975 stmt_vec_info last_stmt_info, tree *type_out)
1977 /* Check for a right shift. */
1978 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1979 if (!last_stmt
1980 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1981 return NULL;
1983 /* Check that the shift result is wider than the users of the
1984 result need (i.e. that narrowing would be a natural choice). */
1985 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1986 unsigned int target_precision
1987 = vect_element_precision (last_stmt_info->min_output_precision);
1988 if (!INTEGRAL_TYPE_P (lhs_type)
1989 || target_precision >= TYPE_PRECISION (lhs_type))
1990 return NULL;
1992 /* Look through any change in sign on the outer shift input. */
1993 vect_unpromoted_value unprom_rshift_input;
1994 tree rshift_input = vect_look_through_possible_promotion
1995 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1996 if (!rshift_input
1997 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1998 != TYPE_PRECISION (lhs_type))
1999 return NULL;
2001 /* Get the definition of the shift input. */
2002 stmt_vec_info rshift_input_stmt_info
2003 = vect_get_internal_def (vinfo, rshift_input);
2004 if (!rshift_input_stmt_info)
2005 return NULL;
2006 gassign *rshift_input_stmt
2007 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2008 if (!rshift_input_stmt)
2009 return NULL;
2011 stmt_vec_info mulh_stmt_info;
2012 tree scale_term;
2013 bool rounding_p = false;
2015 /* Check for the presence of the rounding term. */
2016 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2018 /* Check that the outer shift was by 1. */
2019 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2020 return NULL;
2022 /* Check that the second operand of the PLUS_EXPR is 1. */
2023 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2024 return NULL;
2026 /* Look through any change in sign on the addition input. */
2027 vect_unpromoted_value unprom_plus_input;
2028 tree plus_input = vect_look_through_possible_promotion
2029 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2030 if (!plus_input
2031 || TYPE_PRECISION (TREE_TYPE (plus_input))
2032 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2033 return NULL;
2035 /* Get the definition of the multiply-high-scale part. */
2036 stmt_vec_info plus_input_stmt_info
2037 = vect_get_internal_def (vinfo, plus_input);
2038 if (!plus_input_stmt_info)
2039 return NULL;
2040 gassign *plus_input_stmt
2041 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2042 if (!plus_input_stmt
2043 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2044 return NULL;
2046 /* Look through any change in sign on the scaling input. */
2047 vect_unpromoted_value unprom_scale_input;
2048 tree scale_input = vect_look_through_possible_promotion
2049 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2050 if (!scale_input
2051 || TYPE_PRECISION (TREE_TYPE (scale_input))
2052 != TYPE_PRECISION (TREE_TYPE (plus_input)))
2053 return NULL;
2055 /* Get the definition of the multiply-high part. */
2056 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2057 if (!mulh_stmt_info)
2058 return NULL;
2060 /* Get the scaling term. */
2061 scale_term = gimple_assign_rhs2 (plus_input_stmt);
2062 rounding_p = true;
2064 else
2066 mulh_stmt_info = rshift_input_stmt_info;
2067 scale_term = gimple_assign_rhs2 (last_stmt);
2070 /* Check that the scaling factor is constant. */
2071 if (TREE_CODE (scale_term) != INTEGER_CST)
2072 return NULL;
2074 /* Check whether the scaling input term can be seen as two widened
2075 inputs multiplied together. */
2076 vect_unpromoted_value unprom_mult[2];
2077 tree new_type;
2078 unsigned int nops
2079 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2080 false, 2, unprom_mult, &new_type);
2081 if (nops != 2)
2082 return NULL;
2084 /* Adjust output precision. */
2085 if (TYPE_PRECISION (new_type) < target_precision)
2086 new_type = build_nonstandard_integer_type
2087 (target_precision, TYPE_UNSIGNED (new_type));
2089 unsigned mult_precision = TYPE_PRECISION (new_type);
2090 internal_fn ifn;
2091 /* Check that the scaling factor is expected. Instead of
2092 target_precision, we should use the one that we actually
2093 use for internal function. */
2094 if (rounding_p)
2096 /* Check pattern 2). */
2097 if (wi::to_widest (scale_term) + mult_precision + 2
2098 != TYPE_PRECISION (lhs_type))
2099 return NULL;
2101 ifn = IFN_MULHRS;
2103 else
2105 /* Check for pattern 1). */
2106 if (wi::to_widest (scale_term) + mult_precision + 1
2107 == TYPE_PRECISION (lhs_type))
2108 ifn = IFN_MULHS;
2109 /* Check for pattern 3). */
2110 else if (wi::to_widest (scale_term) + mult_precision
2111 == TYPE_PRECISION (lhs_type))
2112 ifn = IFN_MULH;
2113 else
2114 return NULL;
2117 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2119 /* Check for target support. */
2120 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2121 if (!new_vectype
2122 || !direct_internal_fn_supported_p
2123 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2124 return NULL;
2126 /* The IR requires a valid vector type for the cast result, even though
2127 it's likely to be discarded. */
2128 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2129 if (!*type_out)
2130 return NULL;
2132 /* Generate the IFN_MULHRS call. */
2133 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2134 tree new_ops[2];
2135 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2136 unprom_mult, new_vectype);
2137 gcall *mulhrs_stmt
2138 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2139 gimple_call_set_lhs (mulhrs_stmt, new_var);
2140 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2142 if (dump_enabled_p ())
2143 dump_printf_loc (MSG_NOTE, vect_location,
2144 "created pattern stmt: %G", mulhrs_stmt);
2146 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2147 mulhrs_stmt, new_vectype);
2150 /* Recognize the patterns:
2152 ATYPE a; // narrower than TYPE
2153 BTYPE b; // narrower than TYPE
2154 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2155 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2157 where only the bottom half of avg is used. Try to transform them into:
2159 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2160 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2162 followed by:
2164 TYPE avg = (TYPE) avg';
2166 where NTYPE is no wider than half of TYPE. Since only the bottom half
2167 of avg is used, all or part of the cast of avg' should become redundant.
2169 If there is no target support available, generate code to distribute rshift
2170 over plus and add a carry. */
2172 static gimple *
2173 vect_recog_average_pattern (vec_info *vinfo,
2174 stmt_vec_info last_stmt_info, tree *type_out)
2176 /* Check for a shift right by one bit. */
2177 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2178 if (!last_stmt
2179 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2180 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2181 return NULL;
2183 /* Check that the shift result is wider than the users of the
2184 result need (i.e. that narrowing would be a natural choice). */
2185 tree lhs = gimple_assign_lhs (last_stmt);
2186 tree type = TREE_TYPE (lhs);
2187 unsigned int target_precision
2188 = vect_element_precision (last_stmt_info->min_output_precision);
2189 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2190 return NULL;
2192 /* Look through any change in sign on the shift input. */
2193 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2194 vect_unpromoted_value unprom_plus;
2195 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2196 &unprom_plus);
2197 if (!rshift_rhs
2198 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2199 return NULL;
2201 /* Get the definition of the shift input. */
2202 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2203 if (!plus_stmt_info)
2204 return NULL;
2206 /* Check whether the shift input can be seen as a tree of additions on
2207 2 or 3 widened inputs.
2209 Note that the pattern should be a win even if the result of one or
2210 more additions is reused elsewhere: if the pattern matches, we'd be
2211 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2212 internal_fn ifn = IFN_AVG_FLOOR;
2213 vect_unpromoted_value unprom[3];
2214 tree new_type;
2215 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2216 WIDEN_PLUS_EXPR, false, 3,
2217 unprom, &new_type);
2218 if (nops == 0)
2219 return NULL;
2220 if (nops == 3)
2222 /* Check that one operand is 1. */
2223 unsigned int i;
2224 for (i = 0; i < 3; ++i)
2225 if (integer_onep (unprom[i].op))
2226 break;
2227 if (i == 3)
2228 return NULL;
2229 /* Throw away the 1 operand and keep the other two. */
2230 if (i < 2)
2231 unprom[i] = unprom[2];
2232 ifn = IFN_AVG_CEIL;
2235 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2237 /* We know that:
2239 (a) the operation can be viewed as:
2241 TYPE widened0 = (TYPE) UNPROM[0];
2242 TYPE widened1 = (TYPE) UNPROM[1];
2243 TYPE tmp1 = widened0 + widened1 {+ 1};
2244 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2246 (b) the first two statements are equivalent to:
2248 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2249 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2251 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2252 where sensible;
2254 (d) all the operations can be performed correctly at twice the width of
2255 NEW_TYPE, due to the nature of the average operation; and
2257 (e) users of the result of the right shift need only TARGET_PRECISION
2258 bits, where TARGET_PRECISION is no more than half of TYPE's
2259 precision.
2261 Under these circumstances, the only situation in which NEW_TYPE
2262 could be narrower than TARGET_PRECISION is if widened0, widened1
2263 and an addition result are all used more than once. Thus we can
2264 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2265 as "free", whereas widening the result of the average instruction
2266 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2267 therefore better not to go narrower than TARGET_PRECISION. */
2268 if (TYPE_PRECISION (new_type) < target_precision)
2269 new_type = build_nonstandard_integer_type (target_precision,
2270 TYPE_UNSIGNED (new_type));
2272 /* Check for target support. */
2273 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2274 if (!new_vectype)
2275 return NULL;
2277 bool fallback_p = false;
2279 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2281 else if (TYPE_UNSIGNED (new_type)
2282 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2283 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2284 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2285 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2286 fallback_p = true;
2287 else
2288 return NULL;
2290 /* The IR requires a valid vector type for the cast result, even though
2291 it's likely to be discarded. */
2292 *type_out = get_vectype_for_scalar_type (vinfo, type);
2293 if (!*type_out)
2294 return NULL;
2296 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2297 tree new_ops[2];
2298 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2299 unprom, new_vectype);
2301 if (fallback_p)
2303 /* As a fallback, generate code for following sequence:
2305 shifted_op0 = new_ops[0] >> 1;
2306 shifted_op1 = new_ops[1] >> 1;
2307 sum_of_shifted = shifted_op0 + shifted_op1;
2308 unmasked_carry = new_ops[0] and/or new_ops[1];
2309 carry = unmasked_carry & 1;
2310 new_var = sum_of_shifted + carry;
2313 tree one_cst = build_one_cst (new_type);
2314 gassign *g;
2316 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2317 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2318 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2320 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2321 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2322 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2324 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2325 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2326 shifted_op0, shifted_op1);
2327 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2329 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2330 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2331 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2332 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2334 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2335 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2336 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2338 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2339 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2342 /* Generate the IFN_AVG* call. */
2343 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2344 new_ops[1]);
2345 gimple_call_set_lhs (average_stmt, new_var);
2346 gimple_set_location (average_stmt, gimple_location (last_stmt));
2348 if (dump_enabled_p ())
2349 dump_printf_loc (MSG_NOTE, vect_location,
2350 "created pattern stmt: %G", average_stmt);
2352 return vect_convert_output (vinfo, last_stmt_info,
2353 type, average_stmt, new_vectype);
2356 /* Recognize cases in which the input to a cast is wider than its
2357 output, and the input is fed by a widening operation. Fold this
2358 by removing the unnecessary intermediate widening. E.g.:
2360 unsigned char a;
2361 unsigned int b = (unsigned int) a;
2362 unsigned short c = (unsigned short) b;
2366 unsigned short c = (unsigned short) a;
2368 Although this is rare in input IR, it is an expected side-effect
2369 of the over-widening pattern above.
2371 This is beneficial also for integer-to-float conversions, if the
2372 widened integer has more bits than the float, and if the unwidened
2373 input doesn't. */
2375 static gimple *
2376 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2377 stmt_vec_info last_stmt_info, tree *type_out)
2379 /* Check for a cast, including an integer-to-float conversion. */
2380 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2381 if (!last_stmt)
2382 return NULL;
2383 tree_code code = gimple_assign_rhs_code (last_stmt);
2384 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2385 return NULL;
2387 /* Make sure that the rhs is a scalar with a natural bitsize. */
2388 tree lhs = gimple_assign_lhs (last_stmt);
2389 if (!lhs)
2390 return NULL;
2391 tree lhs_type = TREE_TYPE (lhs);
2392 scalar_mode lhs_mode;
2393 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2394 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2395 return NULL;
2397 /* Check for a narrowing operation (from a vector point of view). */
2398 tree rhs = gimple_assign_rhs1 (last_stmt);
2399 tree rhs_type = TREE_TYPE (rhs);
2400 if (!INTEGRAL_TYPE_P (rhs_type)
2401 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2402 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2403 return NULL;
2405 /* Try to find an unpromoted input. */
2406 vect_unpromoted_value unprom;
2407 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2408 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2409 return NULL;
2411 /* If the bits above RHS_TYPE matter, make sure that they're the
2412 same when extending from UNPROM as they are when extending from RHS. */
2413 if (!INTEGRAL_TYPE_P (lhs_type)
2414 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2415 return NULL;
2417 /* We can get the same result by casting UNPROM directly, to avoid
2418 the unnecessary widening and narrowing. */
2419 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2421 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2422 if (!*type_out)
2423 return NULL;
2425 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2426 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2427 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2429 return pattern_stmt;
2432 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2433 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2435 static gimple *
2436 vect_recog_widen_shift_pattern (vec_info *vinfo,
2437 stmt_vec_info last_stmt_info, tree *type_out)
2439 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2440 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2441 "vect_recog_widen_shift_pattern");
2444 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2446 type a_t, b_t, c_t;
2448 S0 a_t = b_t r<< c_t;
2450 Input/Output:
2452 * STMT_VINFO: The stmt from which the pattern search begins,
2453 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2454 with a sequence:
2456 S1 d_t = -c_t;
2457 S2 e_t = d_t & (B - 1);
2458 S3 f_t = b_t << c_t;
2459 S4 g_t = b_t >> e_t;
2460 S0 a_t = f_t | g_t;
2462 where B is element bitsize of type.
2464 Output:
2466 * TYPE_OUT: The type of the output of this pattern.
2468 * Return value: A new stmt that will be used to replace the rotate
2469 S0 stmt. */
2471 static gimple *
2472 vect_recog_rotate_pattern (vec_info *vinfo,
2473 stmt_vec_info stmt_vinfo, tree *type_out)
2475 gimple *last_stmt = stmt_vinfo->stmt;
2476 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2477 gimple *pattern_stmt, *def_stmt;
2478 enum tree_code rhs_code;
2479 enum vect_def_type dt;
2480 optab optab1, optab2;
2481 edge ext_def = NULL;
2482 bool bswap16_p = false;
2484 if (is_gimple_assign (last_stmt))
2486 rhs_code = gimple_assign_rhs_code (last_stmt);
2487 switch (rhs_code)
2489 case LROTATE_EXPR:
2490 case RROTATE_EXPR:
2491 break;
2492 default:
2493 return NULL;
2496 lhs = gimple_assign_lhs (last_stmt);
2497 oprnd0 = gimple_assign_rhs1 (last_stmt);
2498 type = TREE_TYPE (oprnd0);
2499 oprnd1 = gimple_assign_rhs2 (last_stmt);
2501 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2503 /* __builtin_bswap16 (x) is another form of x r>> 8.
2504 The vectorizer has bswap support, but only if the argument isn't
2505 promoted. */
2506 lhs = gimple_call_lhs (last_stmt);
2507 oprnd0 = gimple_call_arg (last_stmt, 0);
2508 type = TREE_TYPE (oprnd0);
2509 if (!lhs
2510 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2511 || TYPE_PRECISION (type) <= 16
2512 || TREE_CODE (oprnd0) != SSA_NAME
2513 || BITS_PER_UNIT != 8
2514 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2515 return NULL;
2517 stmt_vec_info def_stmt_info;
2518 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2519 return NULL;
2521 if (dt != vect_internal_def)
2522 return NULL;
2524 if (gimple_assign_cast_p (def_stmt))
2526 def = gimple_assign_rhs1 (def_stmt);
2527 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2528 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2529 oprnd0 = def;
2532 type = TREE_TYPE (lhs);
2533 vectype = get_vectype_for_scalar_type (vinfo, type);
2534 if (vectype == NULL_TREE)
2535 return NULL;
2537 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2539 /* The encoding uses one stepped pattern for each byte in the
2540 16-bit word. */
2541 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2542 for (unsigned i = 0; i < 3; ++i)
2543 for (unsigned j = 0; j < 2; ++j)
2544 elts.quick_push ((i + 1) * 2 - j - 1);
2546 vec_perm_indices indices (elts, 1,
2547 TYPE_VECTOR_SUBPARTS (char_vectype));
2548 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2550 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2551 undo the argument promotion. */
2552 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2554 def = vect_recog_temp_ssa_var (type, NULL);
2555 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2556 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2557 oprnd0 = def;
2560 /* Pattern detected. */
2561 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2563 *type_out = vectype;
2565 /* Pattern supported. Create a stmt to be used to replace the
2566 pattern, with the unpromoted argument. */
2567 var = vect_recog_temp_ssa_var (type, NULL);
2568 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2569 1, oprnd0);
2570 gimple_call_set_lhs (pattern_stmt, var);
2571 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2572 gimple_call_fntype (last_stmt));
2573 return pattern_stmt;
2577 oprnd1 = build_int_cst (integer_type_node, 8);
2578 rhs_code = LROTATE_EXPR;
2579 bswap16_p = true;
2581 else
2582 return NULL;
2584 if (TREE_CODE (oprnd0) != SSA_NAME
2585 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2586 || !INTEGRAL_TYPE_P (type)
2587 || !TYPE_UNSIGNED (type))
2588 return NULL;
2590 stmt_vec_info def_stmt_info;
2591 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2592 return NULL;
2594 if (dt != vect_internal_def
2595 && dt != vect_constant_def
2596 && dt != vect_external_def)
2597 return NULL;
2599 vectype = get_vectype_for_scalar_type (vinfo, type);
2600 if (vectype == NULL_TREE)
2601 return NULL;
2603 /* If vector/vector or vector/scalar rotate is supported by the target,
2604 don't do anything here. */
2605 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2606 if (optab1
2607 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2609 use_rotate:
2610 if (bswap16_p)
2612 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2614 def = vect_recog_temp_ssa_var (type, NULL);
2615 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2616 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2617 oprnd0 = def;
2620 /* Pattern detected. */
2621 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2623 *type_out = vectype;
2625 /* Pattern supported. Create a stmt to be used to replace the
2626 pattern. */
2627 var = vect_recog_temp_ssa_var (type, NULL);
2628 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2629 oprnd1);
2630 return pattern_stmt;
2632 return NULL;
2635 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2637 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2638 if (optab2
2639 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2640 goto use_rotate;
2643 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2644 don't do anything here either. */
2645 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2646 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2647 if (!optab1
2648 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2649 || !optab2
2650 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2652 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2653 return NULL;
2654 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2655 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2656 if (!optab1
2657 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2658 || !optab2
2659 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2660 return NULL;
2663 *type_out = vectype;
2665 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2667 def = vect_recog_temp_ssa_var (type, NULL);
2668 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2669 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2670 oprnd0 = def;
2673 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2674 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2676 def = NULL_TREE;
2677 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2678 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2679 def = oprnd1;
2680 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2682 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2683 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2684 && TYPE_PRECISION (TREE_TYPE (rhs1))
2685 == TYPE_PRECISION (type))
2686 def = rhs1;
2689 if (def == NULL_TREE)
2691 def = vect_recog_temp_ssa_var (type, NULL);
2692 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2693 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2695 stype = TREE_TYPE (def);
2697 if (TREE_CODE (def) == INTEGER_CST)
2699 if (!tree_fits_uhwi_p (def)
2700 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2701 || integer_zerop (def))
2702 return NULL;
2703 def2 = build_int_cst (stype,
2704 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2706 else
2708 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2710 if (vecstype == NULL_TREE)
2711 return NULL;
2712 def2 = vect_recog_temp_ssa_var (stype, NULL);
2713 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2714 if (ext_def)
2716 basic_block new_bb
2717 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2718 gcc_assert (!new_bb);
2720 else
2721 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2723 def2 = vect_recog_temp_ssa_var (stype, NULL);
2724 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2725 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2726 gimple_assign_lhs (def_stmt), mask);
2727 if (ext_def)
2729 basic_block new_bb
2730 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2731 gcc_assert (!new_bb);
2733 else
2734 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2737 var1 = vect_recog_temp_ssa_var (type, NULL);
2738 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2739 ? LSHIFT_EXPR : RSHIFT_EXPR,
2740 oprnd0, def);
2741 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2743 var2 = vect_recog_temp_ssa_var (type, NULL);
2744 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2745 ? RSHIFT_EXPR : LSHIFT_EXPR,
2746 oprnd0, def2);
2747 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2749 /* Pattern detected. */
2750 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2752 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2753 var = vect_recog_temp_ssa_var (type, NULL);
2754 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2756 return pattern_stmt;
2759 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2760 vectorized:
2762 type a_t;
2763 TYPE b_T, res_T;
2765 S1 a_t = ;
2766 S2 b_T = ;
2767 S3 res_T = b_T op a_t;
2769 where type 'TYPE' is a type with different size than 'type',
2770 and op is <<, >> or rotate.
2772 Also detect cases:
2774 type a_t;
2775 TYPE b_T, c_T, res_T;
2777 S0 c_T = ;
2778 S1 a_t = (type) c_T;
2779 S2 b_T = ;
2780 S3 res_T = b_T op a_t;
2782 Input/Output:
2784 * STMT_VINFO: The stmt from which the pattern search begins,
2785 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2786 with a shift/rotate which has same type on both operands, in the
2787 second case just b_T op c_T, in the first case with added cast
2788 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2790 Output:
2792 * TYPE_OUT: The type of the output of this pattern.
2794 * Return value: A new stmt that will be used to replace the shift/rotate
2795 S3 stmt. */
2797 static gimple *
2798 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2799 stmt_vec_info stmt_vinfo,
2800 tree *type_out)
2802 gimple *last_stmt = stmt_vinfo->stmt;
2803 tree oprnd0, oprnd1, lhs, var;
2804 gimple *pattern_stmt;
2805 enum tree_code rhs_code;
2807 if (!is_gimple_assign (last_stmt))
2808 return NULL;
2810 rhs_code = gimple_assign_rhs_code (last_stmt);
2811 switch (rhs_code)
2813 case LSHIFT_EXPR:
2814 case RSHIFT_EXPR:
2815 case LROTATE_EXPR:
2816 case RROTATE_EXPR:
2817 break;
2818 default:
2819 return NULL;
2822 lhs = gimple_assign_lhs (last_stmt);
2823 oprnd0 = gimple_assign_rhs1 (last_stmt);
2824 oprnd1 = gimple_assign_rhs2 (last_stmt);
2825 if (TREE_CODE (oprnd0) != SSA_NAME
2826 || TREE_CODE (oprnd1) != SSA_NAME
2827 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2828 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2829 || TYPE_PRECISION (TREE_TYPE (lhs))
2830 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2831 return NULL;
2833 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2834 if (!def_vinfo)
2835 return NULL;
2837 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2838 if (*type_out == NULL_TREE)
2839 return NULL;
2841 tree def = NULL_TREE;
2842 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2843 if (def_stmt && gimple_assign_cast_p (def_stmt))
2845 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2846 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2847 && TYPE_PRECISION (TREE_TYPE (rhs1))
2848 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2850 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2851 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2852 def = rhs1;
2853 else
2855 tree mask
2856 = build_low_bits_mask (TREE_TYPE (rhs1),
2857 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2858 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2859 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2860 tree vecstype = get_vectype_for_scalar_type (vinfo,
2861 TREE_TYPE (rhs1));
2862 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2867 if (def == NULL_TREE)
2869 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2870 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2871 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2874 /* Pattern detected. */
2875 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2877 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2878 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2879 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2881 return pattern_stmt;
2884 /* Return true iff the target has a vector optab implementing the operation
2885 CODE on type VECTYPE. */
2887 static bool
2888 target_has_vecop_for_code (tree_code code, tree vectype)
2890 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2891 return voptab
2892 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2895 /* Verify that the target has optabs of VECTYPE to perform all the steps
2896 needed by the multiplication-by-immediate synthesis algorithm described by
2897 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2898 present. Return true iff the target supports all the steps. */
2900 static bool
2901 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2902 tree vectype, bool synth_shift_p)
2904 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2905 return false;
2907 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2908 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2910 if (var == negate_variant
2911 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2912 return false;
2914 /* If we must synthesize shifts with additions make sure that vector
2915 addition is available. */
2916 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2917 return false;
2919 for (int i = 1; i < alg->ops; i++)
2921 switch (alg->op[i])
2923 case alg_shift:
2924 break;
2925 case alg_add_t_m2:
2926 case alg_add_t2_m:
2927 case alg_add_factor:
2928 if (!supports_vplus)
2929 return false;
2930 break;
2931 case alg_sub_t_m2:
2932 case alg_sub_t2_m:
2933 case alg_sub_factor:
2934 if (!supports_vminus)
2935 return false;
2936 break;
2937 case alg_unknown:
2938 case alg_m:
2939 case alg_zero:
2940 case alg_impossible:
2941 return false;
2942 default:
2943 gcc_unreachable ();
2947 return true;
2950 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2951 putting the final result in DEST. Append all statements but the last into
2952 VINFO. Return the last statement. */
2954 static gimple *
2955 synth_lshift_by_additions (vec_info *vinfo,
2956 tree dest, tree op, HOST_WIDE_INT amnt,
2957 stmt_vec_info stmt_info)
2959 HOST_WIDE_INT i;
2960 tree itype = TREE_TYPE (op);
2961 tree prev_res = op;
2962 gcc_assert (amnt >= 0);
2963 for (i = 0; i < amnt; i++)
2965 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2966 : dest;
2967 gimple *stmt
2968 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2969 prev_res = tmp_var;
2970 if (i < amnt - 1)
2971 append_pattern_def_seq (vinfo, stmt_info, stmt);
2972 else
2973 return stmt;
2975 gcc_unreachable ();
2976 return NULL;
2979 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2980 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2981 the process if necessary. Append the resulting assignment statements
2982 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2983 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2984 left shifts using additions. */
2986 static tree
2987 apply_binop_and_append_stmt (vec_info *vinfo,
2988 tree_code code, tree op1, tree op2,
2989 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2991 if (integer_zerop (op2)
2992 && (code == LSHIFT_EXPR
2993 || code == PLUS_EXPR))
2995 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2996 return op1;
2999 gimple *stmt;
3000 tree itype = TREE_TYPE (op1);
3001 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3003 if (code == LSHIFT_EXPR
3004 && synth_shift_p)
3006 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3007 TREE_INT_CST_LOW (op2), stmt_vinfo);
3008 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3009 return tmp_var;
3012 stmt = gimple_build_assign (tmp_var, code, op1, op2);
3013 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3014 return tmp_var;
3017 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3018 and simple arithmetic operations to be vectorized. Record the statements
3019 produced in STMT_VINFO and return the last statement in the sequence or
3020 NULL if it's not possible to synthesize such a multiplication.
3021 This function mirrors the behavior of expand_mult_const in expmed.c but
3022 works on tree-ssa form. */
3024 static gimple *
3025 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3026 stmt_vec_info stmt_vinfo)
3028 tree itype = TREE_TYPE (op);
3029 machine_mode mode = TYPE_MODE (itype);
3030 struct algorithm alg;
3031 mult_variant variant;
3032 if (!tree_fits_shwi_p (val))
3033 return NULL;
3035 /* Multiplication synthesis by shifts, adds and subs can introduce
3036 signed overflow where the original operation didn't. Perform the
3037 operations on an unsigned type and cast back to avoid this.
3038 In the future we may want to relax this for synthesis algorithms
3039 that we can prove do not cause unexpected overflow. */
3040 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3042 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3044 /* Targets that don't support vector shifts but support vector additions
3045 can synthesize shifts that way. */
3046 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3048 HOST_WIDE_INT hwval = tree_to_shwi (val);
3049 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3050 The vectorizer's benefit analysis will decide whether it's beneficial
3051 to do this. */
3052 bool possible = choose_mult_variant (mode, hwval, &alg,
3053 &variant, MAX_COST);
3054 if (!possible)
3055 return NULL;
3057 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3059 if (!vectype
3060 || !target_supports_mult_synth_alg (&alg, variant,
3061 vectype, synth_shift_p))
3062 return NULL;
3064 tree accumulator;
3066 /* Clear out the sequence of statements so we can populate it below. */
3067 gimple *stmt = NULL;
3069 if (cast_to_unsigned_p)
3071 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3072 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3073 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3074 op = tmp_op;
3077 if (alg.op[0] == alg_zero)
3078 accumulator = build_int_cst (multtype, 0);
3079 else
3080 accumulator = op;
3082 bool needs_fixup = (variant == negate_variant)
3083 || (variant == add_variant);
3085 for (int i = 1; i < alg.ops; i++)
3087 tree shft_log = build_int_cst (multtype, alg.log[i]);
3088 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3089 tree tmp_var = NULL_TREE;
3091 switch (alg.op[i])
3093 case alg_shift:
3094 if (synth_shift_p)
3095 stmt
3096 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3097 alg.log[i], stmt_vinfo);
3098 else
3099 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3100 shft_log);
3101 break;
3102 case alg_add_t_m2:
3103 tmp_var
3104 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3105 stmt_vinfo, synth_shift_p);
3106 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3107 tmp_var);
3108 break;
3109 case alg_sub_t_m2:
3110 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3111 shft_log, stmt_vinfo,
3112 synth_shift_p);
3113 /* In some algorithms the first step involves zeroing the
3114 accumulator. If subtracting from such an accumulator
3115 just emit the negation directly. */
3116 if (integer_zerop (accumulator))
3117 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3118 else
3119 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3120 tmp_var);
3121 break;
3122 case alg_add_t2_m:
3123 tmp_var
3124 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3125 shft_log, stmt_vinfo, synth_shift_p);
3126 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3127 break;
3128 case alg_sub_t2_m:
3129 tmp_var
3130 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3131 shft_log, stmt_vinfo, synth_shift_p);
3132 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3133 break;
3134 case alg_add_factor:
3135 tmp_var
3136 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3137 shft_log, stmt_vinfo, synth_shift_p);
3138 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3139 tmp_var);
3140 break;
3141 case alg_sub_factor:
3142 tmp_var
3143 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3144 shft_log, stmt_vinfo, synth_shift_p);
3145 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3146 accumulator);
3147 break;
3148 default:
3149 gcc_unreachable ();
3151 /* We don't want to append the last stmt in the sequence to stmt_vinfo
3152 but rather return it directly. */
3154 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3155 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3156 accumulator = accum_tmp;
3158 if (variant == negate_variant)
3160 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3161 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3162 accumulator = accum_tmp;
3163 if (cast_to_unsigned_p)
3164 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3166 else if (variant == add_variant)
3168 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3169 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3170 accumulator = accum_tmp;
3171 if (cast_to_unsigned_p)
3172 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3174 /* Move back to a signed if needed. */
3175 if (cast_to_unsigned_p)
3177 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3178 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3181 return stmt;
3184 /* Detect multiplication by constant and convert it into a sequence of
3185 shifts and additions, subtractions, negations. We reuse the
3186 choose_mult_variant algorithms from expmed.c
3188 Input/Output:
3190 STMT_VINFO: The stmt from which the pattern search begins,
3191 i.e. the mult stmt.
3193 Output:
3195 * TYPE_OUT: The type of the output of this pattern.
3197 * Return value: A new stmt that will be used to replace
3198 the multiplication. */
3200 static gimple *
3201 vect_recog_mult_pattern (vec_info *vinfo,
3202 stmt_vec_info stmt_vinfo, tree *type_out)
3204 gimple *last_stmt = stmt_vinfo->stmt;
3205 tree oprnd0, oprnd1, vectype, itype;
3206 gimple *pattern_stmt;
3208 if (!is_gimple_assign (last_stmt))
3209 return NULL;
3211 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3212 return NULL;
3214 oprnd0 = gimple_assign_rhs1 (last_stmt);
3215 oprnd1 = gimple_assign_rhs2 (last_stmt);
3216 itype = TREE_TYPE (oprnd0);
3218 if (TREE_CODE (oprnd0) != SSA_NAME
3219 || TREE_CODE (oprnd1) != INTEGER_CST
3220 || !INTEGRAL_TYPE_P (itype)
3221 || !type_has_mode_precision_p (itype))
3222 return NULL;
3224 vectype = get_vectype_for_scalar_type (vinfo, itype);
3225 if (vectype == NULL_TREE)
3226 return NULL;
3228 /* If the target can handle vectorized multiplication natively,
3229 don't attempt to optimize this. */
3230 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3231 if (mul_optab != unknown_optab)
3233 machine_mode vec_mode = TYPE_MODE (vectype);
3234 int icode = (int) optab_handler (mul_optab, vec_mode);
3235 if (icode != CODE_FOR_nothing)
3236 return NULL;
3239 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3240 oprnd0, oprnd1, stmt_vinfo);
3241 if (!pattern_stmt)
3242 return NULL;
3244 /* Pattern detected. */
3245 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3247 *type_out = vectype;
3249 return pattern_stmt;
3252 /* Detect a signed division by a constant that wouldn't be
3253 otherwise vectorized:
3255 type a_t, b_t;
3257 S1 a_t = b_t / N;
3259 where type 'type' is an integral type and N is a constant.
3261 Similarly handle modulo by a constant:
3263 S4 a_t = b_t % N;
3265 Input/Output:
3267 * STMT_VINFO: The stmt from which the pattern search begins,
3268 i.e. the division stmt. S1 is replaced by if N is a power
3269 of two constant and type is signed:
3270 S3 y_t = b_t < 0 ? N - 1 : 0;
3271 S2 x_t = b_t + y_t;
3272 S1' a_t = x_t >> log2 (N);
3274 S4 is replaced if N is a power of two constant and
3275 type is signed by (where *_T temporaries have unsigned type):
3276 S9 y_T = b_t < 0 ? -1U : 0U;
3277 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3278 S7 z_t = (type) z_T;
3279 S6 w_t = b_t + z_t;
3280 S5 x_t = w_t & (N - 1);
3281 S4' a_t = x_t - z_t;
3283 Output:
3285 * TYPE_OUT: The type of the output of this pattern.
3287 * Return value: A new stmt that will be used to replace the division
3288 S1 or modulo S4 stmt. */
3290 static gimple *
3291 vect_recog_divmod_pattern (vec_info *vinfo,
3292 stmt_vec_info stmt_vinfo, tree *type_out)
3294 gimple *last_stmt = stmt_vinfo->stmt;
3295 tree oprnd0, oprnd1, vectype, itype, cond;
3296 gimple *pattern_stmt, *def_stmt;
3297 enum tree_code rhs_code;
3298 optab optab;
3299 tree q;
3300 int dummy_int, prec;
3302 if (!is_gimple_assign (last_stmt))
3303 return NULL;
3305 rhs_code = gimple_assign_rhs_code (last_stmt);
3306 switch (rhs_code)
3308 case TRUNC_DIV_EXPR:
3309 case EXACT_DIV_EXPR:
3310 case TRUNC_MOD_EXPR:
3311 break;
3312 default:
3313 return NULL;
3316 oprnd0 = gimple_assign_rhs1 (last_stmt);
3317 oprnd1 = gimple_assign_rhs2 (last_stmt);
3318 itype = TREE_TYPE (oprnd0);
3319 if (TREE_CODE (oprnd0) != SSA_NAME
3320 || TREE_CODE (oprnd1) != INTEGER_CST
3321 || TREE_CODE (itype) != INTEGER_TYPE
3322 || !type_has_mode_precision_p (itype))
3323 return NULL;
3325 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3326 vectype = get_vectype_for_scalar_type (vinfo, itype);
3327 if (vectype == NULL_TREE)
3328 return NULL;
3330 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3332 /* If the target can handle vectorized division or modulo natively,
3333 don't attempt to optimize this, since native division is likely
3334 to give smaller code. */
3335 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3336 if (optab != unknown_optab)
3338 machine_mode vec_mode = TYPE_MODE (vectype);
3339 int icode = (int) optab_handler (optab, vec_mode);
3340 if (icode != CODE_FOR_nothing)
3341 return NULL;
3345 prec = TYPE_PRECISION (itype);
3346 if (integer_pow2p (oprnd1))
3348 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3349 return NULL;
3351 /* Pattern detected. */
3352 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3354 *type_out = vectype;
3356 /* Check if the target supports this internal function. */
3357 internal_fn ifn = IFN_DIV_POW2;
3358 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3360 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3362 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3363 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3364 gimple_call_set_lhs (div_stmt, var_div);
3366 if (rhs_code == TRUNC_MOD_EXPR)
3368 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3369 def_stmt
3370 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3371 LSHIFT_EXPR, var_div, shift);
3372 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3373 pattern_stmt
3374 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3375 MINUS_EXPR, oprnd0,
3376 gimple_assign_lhs (def_stmt));
3378 else
3379 pattern_stmt = div_stmt;
3380 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3382 return pattern_stmt;
3385 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3386 build_int_cst (itype, 0));
3387 if (rhs_code == TRUNC_DIV_EXPR
3388 || rhs_code == EXACT_DIV_EXPR)
3390 tree var = vect_recog_temp_ssa_var (itype, NULL);
3391 tree shift;
3392 def_stmt
3393 = gimple_build_assign (var, COND_EXPR, cond,
3394 fold_build2 (MINUS_EXPR, itype, oprnd1,
3395 build_int_cst (itype, 1)),
3396 build_int_cst (itype, 0));
3397 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3398 var = vect_recog_temp_ssa_var (itype, NULL);
3399 def_stmt
3400 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3401 gimple_assign_lhs (def_stmt));
3402 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3404 shift = build_int_cst (itype, tree_log2 (oprnd1));
3405 pattern_stmt
3406 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3407 RSHIFT_EXPR, var, shift);
3409 else
3411 tree signmask;
3412 if (compare_tree_int (oprnd1, 2) == 0)
3414 signmask = vect_recog_temp_ssa_var (itype, NULL);
3415 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3416 build_int_cst (itype, 1),
3417 build_int_cst (itype, 0));
3418 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3420 else
3422 tree utype
3423 = build_nonstandard_integer_type (prec, 1);
3424 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3425 tree shift
3426 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3427 - tree_log2 (oprnd1));
3428 tree var = vect_recog_temp_ssa_var (utype, NULL);
3430 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3431 build_int_cst (utype, -1),
3432 build_int_cst (utype, 0));
3433 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3434 var = vect_recog_temp_ssa_var (utype, NULL);
3435 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3436 gimple_assign_lhs (def_stmt),
3437 shift);
3438 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3439 signmask = vect_recog_temp_ssa_var (itype, NULL);
3440 def_stmt
3441 = gimple_build_assign (signmask, NOP_EXPR, var);
3442 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3444 def_stmt
3445 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3446 PLUS_EXPR, oprnd0, signmask);
3447 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3448 def_stmt
3449 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3450 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3451 fold_build2 (MINUS_EXPR, itype, oprnd1,
3452 build_int_cst (itype, 1)));
3453 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3455 pattern_stmt
3456 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3457 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3458 signmask);
3461 return pattern_stmt;
3464 if (prec > HOST_BITS_PER_WIDE_INT
3465 || integer_zerop (oprnd1))
3466 return NULL;
3468 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3469 return NULL;
3471 if (TYPE_UNSIGNED (itype))
3473 unsigned HOST_WIDE_INT mh, ml;
3474 int pre_shift, post_shift;
3475 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3476 & GET_MODE_MASK (itype_mode));
3477 tree t1, t2, t3, t4;
3479 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3480 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3481 return NULL;
3483 /* Find a suitable multiplier and right shift count
3484 instead of multiplying with D. */
3485 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3487 /* If the suggested multiplier is more than SIZE bits, we can do better
3488 for even divisors, using an initial right shift. */
3489 if (mh != 0 && (d & 1) == 0)
3491 pre_shift = ctz_or_zero (d);
3492 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3493 &ml, &post_shift, &dummy_int);
3494 gcc_assert (!mh);
3496 else
3497 pre_shift = 0;
3499 if (mh != 0)
3501 if (post_shift - 1 >= prec)
3502 return NULL;
3504 /* t1 = oprnd0 h* ml;
3505 t2 = oprnd0 - t1;
3506 t3 = t2 >> 1;
3507 t4 = t1 + t3;
3508 q = t4 >> (post_shift - 1); */
3509 t1 = vect_recog_temp_ssa_var (itype, NULL);
3510 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3511 build_int_cst (itype, ml));
3512 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3514 t2 = vect_recog_temp_ssa_var (itype, NULL);
3515 def_stmt
3516 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3517 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3519 t3 = vect_recog_temp_ssa_var (itype, NULL);
3520 def_stmt
3521 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3522 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3524 t4 = vect_recog_temp_ssa_var (itype, NULL);
3525 def_stmt
3526 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3528 if (post_shift != 1)
3530 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3532 q = vect_recog_temp_ssa_var (itype, NULL);
3533 pattern_stmt
3534 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3535 build_int_cst (itype, post_shift - 1));
3537 else
3539 q = t4;
3540 pattern_stmt = def_stmt;
3543 else
3545 if (pre_shift >= prec || post_shift >= prec)
3546 return NULL;
3548 /* t1 = oprnd0 >> pre_shift;
3549 t2 = t1 h* ml;
3550 q = t2 >> post_shift; */
3551 if (pre_shift)
3553 t1 = vect_recog_temp_ssa_var (itype, NULL);
3554 def_stmt
3555 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3556 build_int_cst (NULL, pre_shift));
3557 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3559 else
3560 t1 = oprnd0;
3562 t2 = vect_recog_temp_ssa_var (itype, NULL);
3563 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3564 build_int_cst (itype, ml));
3566 if (post_shift)
3568 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3570 q = vect_recog_temp_ssa_var (itype, NULL);
3571 def_stmt
3572 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3573 build_int_cst (itype, post_shift));
3575 else
3576 q = t2;
3578 pattern_stmt = def_stmt;
3581 else
3583 unsigned HOST_WIDE_INT ml;
3584 int post_shift;
3585 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3586 unsigned HOST_WIDE_INT abs_d;
3587 bool add = false;
3588 tree t1, t2, t3, t4;
3590 /* Give up for -1. */
3591 if (d == -1)
3592 return NULL;
3594 /* Since d might be INT_MIN, we have to cast to
3595 unsigned HOST_WIDE_INT before negating to avoid
3596 undefined signed overflow. */
3597 abs_d = (d >= 0
3598 ? (unsigned HOST_WIDE_INT) d
3599 : - (unsigned HOST_WIDE_INT) d);
3601 /* n rem d = n rem -d */
3602 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3604 d = abs_d;
3605 oprnd1 = build_int_cst (itype, abs_d);
3607 if (HOST_BITS_PER_WIDE_INT >= prec
3608 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3609 /* This case is not handled correctly below. */
3610 return NULL;
3612 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3613 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3615 add = true;
3616 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3618 if (post_shift >= prec)
3619 return NULL;
3621 /* t1 = oprnd0 h* ml; */
3622 t1 = vect_recog_temp_ssa_var (itype, NULL);
3623 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3624 build_int_cst (itype, ml));
3626 if (add)
3628 /* t2 = t1 + oprnd0; */
3629 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3630 t2 = vect_recog_temp_ssa_var (itype, NULL);
3631 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3633 else
3634 t2 = t1;
3636 if (post_shift)
3638 /* t3 = t2 >> post_shift; */
3639 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3640 t3 = vect_recog_temp_ssa_var (itype, NULL);
3641 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3642 build_int_cst (itype, post_shift));
3644 else
3645 t3 = t2;
3647 int msb = 1;
3648 value_range r;
3649 get_range_query (cfun)->range_of_expr (r, oprnd0);
3650 if (r.kind () == VR_RANGE)
3652 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
3653 msb = 0;
3654 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
3655 msb = -1;
3658 if (msb == 0 && d >= 0)
3660 /* q = t3; */
3661 q = t3;
3662 pattern_stmt = def_stmt;
3664 else
3666 /* t4 = oprnd0 >> (prec - 1);
3667 or if we know from VRP that oprnd0 >= 0
3668 t4 = 0;
3669 or if we know from VRP that oprnd0 < 0
3670 t4 = -1; */
3671 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3672 t4 = vect_recog_temp_ssa_var (itype, NULL);
3673 if (msb != 1)
3674 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3675 build_int_cst (itype, msb));
3676 else
3677 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3678 build_int_cst (itype, prec - 1));
3679 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3681 /* q = t3 - t4; or q = t4 - t3; */
3682 q = vect_recog_temp_ssa_var (itype, NULL);
3683 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3684 d < 0 ? t3 : t4);
3688 if (rhs_code == TRUNC_MOD_EXPR)
3690 tree r, t1;
3692 /* We divided. Now finish by:
3693 t1 = q * oprnd1;
3694 r = oprnd0 - t1; */
3695 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3697 t1 = vect_recog_temp_ssa_var (itype, NULL);
3698 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3699 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3701 r = vect_recog_temp_ssa_var (itype, NULL);
3702 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3705 /* Pattern detected. */
3706 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3708 *type_out = vectype;
3709 return pattern_stmt;
3712 /* Function vect_recog_mixed_size_cond_pattern
3714 Try to find the following pattern:
3716 type x_t, y_t;
3717 TYPE a_T, b_T, c_T;
3718 loop:
3719 S1 a_T = x_t CMP y_t ? b_T : c_T;
3721 where type 'TYPE' is an integral type which has different size
3722 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3723 than 'type', the constants need to fit into an integer type
3724 with the same width as 'type') or results of conversion from 'type'.
3726 Input:
3728 * STMT_VINFO: The stmt from which the pattern search begins.
3730 Output:
3732 * TYPE_OUT: The type of the output of this pattern.
3734 * Return value: A new stmt that will be used to replace the pattern.
3735 Additionally a def_stmt is added.
3737 a_it = x_t CMP y_t ? b_it : c_it;
3738 a_T = (TYPE) a_it; */
3740 static gimple *
3741 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3742 stmt_vec_info stmt_vinfo, tree *type_out)
3744 gimple *last_stmt = stmt_vinfo->stmt;
3745 tree cond_expr, then_clause, else_clause;
3746 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3747 gimple *pattern_stmt, *def_stmt;
3748 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3749 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3750 bool promotion;
3751 tree comp_scalar_type;
3753 if (!is_gimple_assign (last_stmt)
3754 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3755 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3756 return NULL;
3758 cond_expr = gimple_assign_rhs1 (last_stmt);
3759 then_clause = gimple_assign_rhs2 (last_stmt);
3760 else_clause = gimple_assign_rhs3 (last_stmt);
3762 if (!COMPARISON_CLASS_P (cond_expr))
3763 return NULL;
3765 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3766 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3767 if (comp_vectype == NULL_TREE)
3768 return NULL;
3770 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3771 if (types_compatible_p (type, comp_scalar_type)
3772 || ((TREE_CODE (then_clause) != INTEGER_CST
3773 || TREE_CODE (else_clause) != INTEGER_CST)
3774 && !INTEGRAL_TYPE_P (comp_scalar_type))
3775 || !INTEGRAL_TYPE_P (type))
3776 return NULL;
3778 if ((TREE_CODE (then_clause) != INTEGER_CST
3779 && !type_conversion_p (vinfo, then_clause, false,
3780 &orig_type0, &def_stmt0, &promotion))
3781 || (TREE_CODE (else_clause) != INTEGER_CST
3782 && !type_conversion_p (vinfo, else_clause, false,
3783 &orig_type1, &def_stmt1, &promotion)))
3784 return NULL;
3786 if (orig_type0 && orig_type1
3787 && !types_compatible_p (orig_type0, orig_type1))
3788 return NULL;
3790 if (orig_type0)
3792 if (!types_compatible_p (orig_type0, comp_scalar_type))
3793 return NULL;
3794 then_clause = gimple_assign_rhs1 (def_stmt0);
3795 itype = orig_type0;
3798 if (orig_type1)
3800 if (!types_compatible_p (orig_type1, comp_scalar_type))
3801 return NULL;
3802 else_clause = gimple_assign_rhs1 (def_stmt1);
3803 itype = orig_type1;
3807 HOST_WIDE_INT cmp_mode_size
3808 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3810 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3811 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3812 return NULL;
3814 vectype = get_vectype_for_scalar_type (vinfo, type);
3815 if (vectype == NULL_TREE)
3816 return NULL;
3818 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3819 return NULL;
3821 if (itype == NULL_TREE)
3822 itype = build_nonstandard_integer_type (cmp_mode_size,
3823 TYPE_UNSIGNED (type));
3825 if (itype == NULL_TREE
3826 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3827 return NULL;
3829 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3830 if (vecitype == NULL_TREE)
3831 return NULL;
3833 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3834 return NULL;
3836 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3838 if ((TREE_CODE (then_clause) == INTEGER_CST
3839 && !int_fits_type_p (then_clause, itype))
3840 || (TREE_CODE (else_clause) == INTEGER_CST
3841 && !int_fits_type_p (else_clause, itype)))
3842 return NULL;
3845 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3846 COND_EXPR, unshare_expr (cond_expr),
3847 fold_convert (itype, then_clause),
3848 fold_convert (itype, else_clause));
3849 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3850 NOP_EXPR, gimple_assign_lhs (def_stmt));
3852 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3853 *type_out = vectype;
3855 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3857 return pattern_stmt;
3861 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3862 true if bool VAR can and should be optimized that way. Assume it shouldn't
3863 in case it's a result of a comparison which can be directly vectorized into
3864 a vector comparison. Fills in STMTS with all stmts visited during the
3865 walk. */
3867 static bool
3868 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3870 tree rhs1;
3871 enum tree_code rhs_code;
3873 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3874 if (!def_stmt_info)
3875 return false;
3877 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3878 if (!def_stmt)
3879 return false;
3881 if (stmts.contains (def_stmt))
3882 return true;
3884 rhs1 = gimple_assign_rhs1 (def_stmt);
3885 rhs_code = gimple_assign_rhs_code (def_stmt);
3886 switch (rhs_code)
3888 case SSA_NAME:
3889 if (! check_bool_pattern (rhs1, vinfo, stmts))
3890 return false;
3891 break;
3893 CASE_CONVERT:
3894 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3895 return false;
3896 if (! check_bool_pattern (rhs1, vinfo, stmts))
3897 return false;
3898 break;
3900 case BIT_NOT_EXPR:
3901 if (! check_bool_pattern (rhs1, vinfo, stmts))
3902 return false;
3903 break;
3905 case BIT_AND_EXPR:
3906 case BIT_IOR_EXPR:
3907 case BIT_XOR_EXPR:
3908 if (! check_bool_pattern (rhs1, vinfo, stmts)
3909 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3910 return false;
3911 break;
3913 default:
3914 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3916 tree vecitype, comp_vectype;
3918 /* If the comparison can throw, then is_gimple_condexpr will be
3919 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3920 if (stmt_could_throw_p (cfun, def_stmt))
3921 return false;
3923 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
3924 if (comp_vectype == NULL_TREE)
3925 return false;
3927 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3928 TREE_TYPE (rhs1));
3929 if (mask_type
3930 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3931 return false;
3933 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3935 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3936 tree itype
3937 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3938 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3939 if (vecitype == NULL_TREE)
3940 return false;
3942 else
3943 vecitype = comp_vectype;
3944 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3945 return false;
3947 else
3948 return false;
3949 break;
3952 bool res = stmts.add (def_stmt);
3953 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3954 gcc_assert (!res);
3956 return true;
3960 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3961 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3962 pattern sequence. */
3964 static tree
3965 adjust_bool_pattern_cast (vec_info *vinfo,
3966 tree type, tree var, stmt_vec_info stmt_info)
3968 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3969 NOP_EXPR, var);
3970 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
3971 get_vectype_for_scalar_type (vinfo, type));
3972 return gimple_assign_lhs (cast_stmt);
3975 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3976 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3977 type, OUT_TYPE is the desired final integer type of the whole pattern.
3978 STMT_INFO is the info of the pattern root and is where pattern stmts should
3979 be associated with. DEFS is a map of pattern defs. */
3981 static void
3982 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
3983 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3985 gimple *stmt = SSA_NAME_DEF_STMT (var);
3986 enum tree_code rhs_code, def_rhs_code;
3987 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3988 location_t loc;
3989 gimple *pattern_stmt, *def_stmt;
3990 tree trueval = NULL_TREE;
3992 rhs1 = gimple_assign_rhs1 (stmt);
3993 rhs2 = gimple_assign_rhs2 (stmt);
3994 rhs_code = gimple_assign_rhs_code (stmt);
3995 loc = gimple_location (stmt);
3996 switch (rhs_code)
3998 case SSA_NAME:
3999 CASE_CONVERT:
4000 irhs1 = *defs.get (rhs1);
4001 itype = TREE_TYPE (irhs1);
4002 pattern_stmt
4003 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4004 SSA_NAME, irhs1);
4005 break;
4007 case BIT_NOT_EXPR:
4008 irhs1 = *defs.get (rhs1);
4009 itype = TREE_TYPE (irhs1);
4010 pattern_stmt
4011 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4012 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4013 break;
4015 case BIT_AND_EXPR:
4016 /* Try to optimize x = y & (a < b ? 1 : 0); into
4017 x = (a < b ? y : 0);
4019 E.g. for:
4020 bool a_b, b_b, c_b;
4021 TYPE d_T;
4023 S1 a_b = x1 CMP1 y1;
4024 S2 b_b = x2 CMP2 y2;
4025 S3 c_b = a_b & b_b;
4026 S4 d_T = (TYPE) c_b;
4028 we would normally emit:
4030 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4031 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4032 S3' c_T = a_T & b_T;
4033 S4' d_T = c_T;
4035 but we can save one stmt by using the
4036 result of one of the COND_EXPRs in the other COND_EXPR and leave
4037 BIT_AND_EXPR stmt out:
4039 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4040 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4041 S4' f_T = c_T;
4043 At least when VEC_COND_EXPR is implemented using masks
4044 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4045 computes the comparison masks and ands it, in one case with
4046 all ones vector, in the other case with a vector register.
4047 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4048 often more expensive. */
4049 def_stmt = SSA_NAME_DEF_STMT (rhs2);
4050 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4051 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4053 irhs1 = *defs.get (rhs1);
4054 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4055 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4056 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4058 rhs_code = def_rhs_code;
4059 rhs1 = def_rhs1;
4060 rhs2 = gimple_assign_rhs2 (def_stmt);
4061 trueval = irhs1;
4062 goto do_compare;
4064 else
4065 irhs2 = *defs.get (rhs2);
4066 goto and_ior_xor;
4068 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4069 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4070 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4072 irhs2 = *defs.get (rhs2);
4073 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4074 if (TYPE_PRECISION (TREE_TYPE (irhs2))
4075 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4077 rhs_code = def_rhs_code;
4078 rhs1 = def_rhs1;
4079 rhs2 = gimple_assign_rhs2 (def_stmt);
4080 trueval = irhs2;
4081 goto do_compare;
4083 else
4084 irhs1 = *defs.get (rhs1);
4085 goto and_ior_xor;
4087 /* FALLTHRU */
4088 case BIT_IOR_EXPR:
4089 case BIT_XOR_EXPR:
4090 irhs1 = *defs.get (rhs1);
4091 irhs2 = *defs.get (rhs2);
4092 and_ior_xor:
4093 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4094 != TYPE_PRECISION (TREE_TYPE (irhs2)))
4096 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4097 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4098 int out_prec = TYPE_PRECISION (out_type);
4099 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4100 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4101 stmt_info);
4102 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4103 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4104 stmt_info);
4105 else
4107 irhs1 = adjust_bool_pattern_cast (vinfo,
4108 out_type, irhs1, stmt_info);
4109 irhs2 = adjust_bool_pattern_cast (vinfo,
4110 out_type, irhs2, stmt_info);
4113 itype = TREE_TYPE (irhs1);
4114 pattern_stmt
4115 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4116 rhs_code, irhs1, irhs2);
4117 break;
4119 default:
4120 do_compare:
4121 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4122 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4123 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4124 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4125 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4127 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4128 itype
4129 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4131 else
4132 itype = TREE_TYPE (rhs1);
4133 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4134 if (trueval == NULL_TREE)
4135 trueval = build_int_cst (itype, 1);
4136 else
4137 gcc_checking_assert (useless_type_conversion_p (itype,
4138 TREE_TYPE (trueval)));
4139 pattern_stmt
4140 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4141 COND_EXPR, cond_expr, trueval,
4142 build_int_cst (itype, 0));
4143 break;
4146 gimple_set_location (pattern_stmt, loc);
4147 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4148 get_vectype_for_scalar_type (vinfo, itype));
4149 defs.put (var, gimple_assign_lhs (pattern_stmt));
4152 /* Comparison function to qsort a vector of gimple stmts after UID. */
4154 static int
4155 sort_after_uid (const void *p1, const void *p2)
4157 const gimple *stmt1 = *(const gimple * const *)p1;
4158 const gimple *stmt2 = *(const gimple * const *)p2;
4159 return gimple_uid (stmt1) - gimple_uid (stmt2);
4162 /* Create pattern stmts for all stmts participating in the bool pattern
4163 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4164 OUT_TYPE. Return the def of the pattern root. */
4166 static tree
4167 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4168 tree out_type, stmt_vec_info stmt_info)
4170 /* Gather original stmts in the bool pattern in their order of appearance
4171 in the IL. */
4172 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4173 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4174 i != bool_stmt_set.end (); ++i)
4175 bool_stmts.quick_push (*i);
4176 bool_stmts.qsort (sort_after_uid);
4178 /* Now process them in that order, producing pattern stmts. */
4179 hash_map <tree, tree> defs;
4180 for (unsigned i = 0; i < bool_stmts.length (); ++i)
4181 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4182 out_type, stmt_info, defs);
4184 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4185 gimple *pattern_stmt
4186 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4187 return gimple_assign_lhs (pattern_stmt);
4190 /* Return the proper type for converting bool VAR into
4191 an integer value or NULL_TREE if no such type exists.
4192 The type is chosen so that the converted value has the
4193 same number of elements as VAR's vector type. */
4195 static tree
4196 integer_type_for_mask (tree var, vec_info *vinfo)
4198 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4199 return NULL_TREE;
4201 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4202 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4203 return NULL_TREE;
4205 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4208 /* Function vect_recog_bool_pattern
4210 Try to find pattern like following:
4212 bool a_b, b_b, c_b, d_b, e_b;
4213 TYPE f_T;
4214 loop:
4215 S1 a_b = x1 CMP1 y1;
4216 S2 b_b = x2 CMP2 y2;
4217 S3 c_b = a_b & b_b;
4218 S4 d_b = x3 CMP3 y3;
4219 S5 e_b = c_b | d_b;
4220 S6 f_T = (TYPE) e_b;
4222 where type 'TYPE' is an integral type. Or a similar pattern
4223 ending in
4225 S6 f_Y = e_b ? r_Y : s_Y;
4227 as results from if-conversion of a complex condition.
4229 Input:
4231 * STMT_VINFO: The stmt at the end from which the pattern
4232 search begins, i.e. cast of a bool to
4233 an integer type.
4235 Output:
4237 * TYPE_OUT: The type of the output of this pattern.
4239 * Return value: A new stmt that will be used to replace the pattern.
4241 Assuming size of TYPE is the same as size of all comparisons
4242 (otherwise some casts would be added where needed), the above
4243 sequence we create related pattern stmts:
4244 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4245 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4246 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4247 S5' e_T = c_T | d_T;
4248 S6' f_T = e_T;
4250 Instead of the above S3' we could emit:
4251 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4252 S3' c_T = a_T | b_T;
4253 but the above is more efficient. */
4255 static gimple *
4256 vect_recog_bool_pattern (vec_info *vinfo,
4257 stmt_vec_info stmt_vinfo, tree *type_out)
4259 gimple *last_stmt = stmt_vinfo->stmt;
4260 enum tree_code rhs_code;
4261 tree var, lhs, rhs, vectype;
4262 gimple *pattern_stmt;
4264 if (!is_gimple_assign (last_stmt))
4265 return NULL;
4267 var = gimple_assign_rhs1 (last_stmt);
4268 lhs = gimple_assign_lhs (last_stmt);
4269 rhs_code = gimple_assign_rhs_code (last_stmt);
4271 if (rhs_code == VIEW_CONVERT_EXPR)
4272 var = TREE_OPERAND (var, 0);
4274 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4275 return NULL;
4277 hash_set<gimple *> bool_stmts;
4279 if (CONVERT_EXPR_CODE_P (rhs_code)
4280 || rhs_code == VIEW_CONVERT_EXPR)
4282 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4283 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4284 return NULL;
4285 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4286 if (vectype == NULL_TREE)
4287 return NULL;
4289 if (check_bool_pattern (var, vinfo, bool_stmts))
4291 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4292 TREE_TYPE (lhs), stmt_vinfo);
4293 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4294 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4295 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4296 else
4297 pattern_stmt
4298 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4300 else
4302 tree type = integer_type_for_mask (var, vinfo);
4303 tree cst0, cst1, tmp;
4305 if (!type)
4306 return NULL;
4308 /* We may directly use cond with narrowed type to avoid
4309 multiple cond exprs with following result packing and
4310 perform single cond with packed mask instead. In case
4311 of widening we better make cond first and then extract
4312 results. */
4313 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4314 type = TREE_TYPE (lhs);
4316 cst0 = build_int_cst (type, 0);
4317 cst1 = build_int_cst (type, 1);
4318 tmp = vect_recog_temp_ssa_var (type, NULL);
4319 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4321 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4323 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4324 append_pattern_def_seq (vinfo, stmt_vinfo,
4325 pattern_stmt, new_vectype);
4327 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4328 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4332 *type_out = vectype;
4333 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4335 return pattern_stmt;
4337 else if (rhs_code == COND_EXPR
4338 && TREE_CODE (var) == SSA_NAME)
4340 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4341 if (vectype == NULL_TREE)
4342 return NULL;
4344 /* Build a scalar type for the boolean result that when
4345 vectorized matches the vector type of the result in
4346 size and number of elements. */
4347 unsigned prec
4348 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4349 TYPE_VECTOR_SUBPARTS (vectype));
4351 tree type
4352 = build_nonstandard_integer_type (prec,
4353 TYPE_UNSIGNED (TREE_TYPE (var)));
4354 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4355 return NULL;
4357 if (!check_bool_pattern (var, vinfo, bool_stmts))
4358 return NULL;
4360 rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4362 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4363 pattern_stmt
4364 = gimple_build_assign (lhs, COND_EXPR,
4365 build2 (NE_EXPR, boolean_type_node,
4366 rhs, build_int_cst (type, 0)),
4367 gimple_assign_rhs2 (last_stmt),
4368 gimple_assign_rhs3 (last_stmt));
4369 *type_out = vectype;
4370 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4372 return pattern_stmt;
4374 else if (rhs_code == SSA_NAME
4375 && STMT_VINFO_DATA_REF (stmt_vinfo))
4377 stmt_vec_info pattern_stmt_info;
4378 tree nunits_vectype;
4379 if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4380 &nunits_vectype)
4381 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4382 return NULL;
4384 if (check_bool_pattern (var, vinfo, bool_stmts))
4385 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4386 TREE_TYPE (vectype), stmt_vinfo);
4387 else
4389 tree type = integer_type_for_mask (var, vinfo);
4390 tree cst0, cst1, new_vectype;
4392 if (!type)
4393 return NULL;
4395 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4396 type = TREE_TYPE (vectype);
4398 cst0 = build_int_cst (type, 0);
4399 cst1 = build_int_cst (type, 1);
4400 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4402 rhs = vect_recog_temp_ssa_var (type, NULL);
4403 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4404 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4407 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4408 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4410 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4411 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4412 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4413 rhs = rhs2;
4415 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4416 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4417 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4418 *type_out = vectype;
4419 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4421 return pattern_stmt;
4423 else
4424 return NULL;
4428 /* A helper for vect_recog_mask_conversion_pattern. Build
4429 conversion of MASK to a type suitable for masking VECTYPE.
4430 Built statement gets required vectype and is appended to
4431 a pattern sequence of STMT_VINFO.
4433 Return converted mask. */
4435 static tree
4436 build_mask_conversion (vec_info *vinfo,
4437 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4439 gimple *stmt;
4440 tree masktype, tmp;
4442 masktype = truth_type_for (vectype);
4443 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4444 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4445 append_pattern_def_seq (vinfo, stmt_vinfo,
4446 stmt, masktype, TREE_TYPE (vectype));
4448 return tmp;
4452 /* Function vect_recog_mask_conversion_pattern
4454 Try to find statements which require boolean type
4455 converison. Additional conversion statements are
4456 added to handle such cases. For example:
4458 bool m_1, m_2, m_3;
4459 int i_4, i_5;
4460 double d_6, d_7;
4461 char c_1, c_2, c_3;
4463 S1 m_1 = i_4 > i_5;
4464 S2 m_2 = d_6 < d_7;
4465 S3 m_3 = m_1 & m_2;
4466 S4 c_1 = m_3 ? c_2 : c_3;
4468 Will be transformed into:
4470 S1 m_1 = i_4 > i_5;
4471 S2 m_2 = d_6 < d_7;
4472 S3'' m_2' = (_Bool[bitsize=32])m_2
4473 S3' m_3' = m_1 & m_2';
4474 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4475 S4' c_1' = m_3'' ? c_2 : c_3; */
4477 static gimple *
4478 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4479 stmt_vec_info stmt_vinfo, tree *type_out)
4481 gimple *last_stmt = stmt_vinfo->stmt;
4482 enum tree_code rhs_code;
4483 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4484 tree vectype1, vectype2;
4485 stmt_vec_info pattern_stmt_info;
4486 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4487 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4489 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4490 if (is_gimple_call (last_stmt)
4491 && gimple_call_internal_p (last_stmt))
4493 gcall *pattern_stmt;
4495 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4496 int mask_argno = internal_fn_mask_index (ifn);
4497 if (mask_argno < 0)
4498 return NULL;
4500 bool store_p = internal_store_fn_p (ifn);
4501 if (store_p)
4503 int rhs_index = internal_fn_stored_value_index (ifn);
4504 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4505 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4507 else
4509 lhs = gimple_call_lhs (last_stmt);
4510 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4513 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4514 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4515 if (!mask_arg_type)
4516 return NULL;
4517 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4519 if (!vectype1 || !vectype2
4520 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4521 TYPE_VECTOR_SUBPARTS (vectype2)))
4522 return NULL;
4524 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4526 auto_vec<tree, 8> args;
4527 unsigned int nargs = gimple_call_num_args (last_stmt);
4528 args.safe_grow (nargs, true);
4529 for (unsigned int i = 0; i < nargs; ++i)
4530 args[i] = ((int) i == mask_argno
4531 ? tmp
4532 : gimple_call_arg (last_stmt, i));
4533 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4535 if (!store_p)
4537 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4538 gimple_call_set_lhs (pattern_stmt, lhs);
4540 gimple_call_set_nothrow (pattern_stmt, true);
4542 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4543 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4544 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4546 *type_out = vectype1;
4547 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4549 return pattern_stmt;
4552 if (!is_gimple_assign (last_stmt))
4553 return NULL;
4555 gimple *pattern_stmt;
4556 lhs = gimple_assign_lhs (last_stmt);
4557 rhs1 = gimple_assign_rhs1 (last_stmt);
4558 rhs_code = gimple_assign_rhs_code (last_stmt);
4560 /* Check for cond expression requiring mask conversion. */
4561 if (rhs_code == COND_EXPR)
4563 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4565 if (TREE_CODE (rhs1) == SSA_NAME)
4567 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4568 if (!rhs1_type)
4569 return NULL;
4571 else if (COMPARISON_CLASS_P (rhs1))
4573 /* Check whether we're comparing scalar booleans and (if so)
4574 whether a better mask type exists than the mask associated
4575 with boolean-sized elements. This avoids unnecessary packs
4576 and unpacks if the booleans are set from comparisons of
4577 wider types. E.g. in:
4579 int x1, x2, x3, x4, y1, y1;
4581 bool b1 = (x1 == x2);
4582 bool b2 = (x3 == x4);
4583 ... = b1 == b2 ? y1 : y2;
4585 it is better for b1 and b2 to use the mask type associated
4586 with int elements rather bool (byte) elements. */
4587 rhs1_op0 = TREE_OPERAND (rhs1, 0);
4588 rhs1_op1 = TREE_OPERAND (rhs1, 1);
4589 if (!rhs1_op0 || !rhs1_op1)
4590 return NULL;
4591 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4592 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4594 if (!rhs1_op0_type)
4595 rhs1_type = TREE_TYPE (rhs1_op0);
4596 else if (!rhs1_op1_type)
4597 rhs1_type = TREE_TYPE (rhs1_op1);
4598 else if (TYPE_PRECISION (rhs1_op0_type)
4599 != TYPE_PRECISION (rhs1_op1_type))
4601 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4602 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4603 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4604 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4605 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4607 if (abs (tmp0) > abs (tmp1))
4608 rhs1_type = rhs1_op1_type;
4609 else
4610 rhs1_type = rhs1_op0_type;
4612 else
4613 rhs1_type = build_nonstandard_integer_type
4614 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4616 else
4617 rhs1_type = rhs1_op0_type;
4619 else
4620 return NULL;
4622 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4624 if (!vectype1 || !vectype2)
4625 return NULL;
4627 /* Continue if a conversion is needed. Also continue if we have
4628 a comparison whose vector type would normally be different from
4629 VECTYPE2 when considered in isolation. In that case we'll
4630 replace the comparison with an SSA name (so that we can record
4631 its vector type) and behave as though the comparison was an SSA
4632 name from the outset. */
4633 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4634 TYPE_VECTOR_SUBPARTS (vectype2))
4635 && !rhs1_op0_type
4636 && !rhs1_op1_type)
4637 return NULL;
4639 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4640 in place, we can handle it in vectorizable_condition. This avoids
4641 unnecessary promotion stmts and increased vectorization factor. */
4642 if (COMPARISON_CLASS_P (rhs1)
4643 && INTEGRAL_TYPE_P (rhs1_type)
4644 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4645 TYPE_VECTOR_SUBPARTS (vectype2)))
4647 enum vect_def_type dt;
4648 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4649 && dt == vect_external_def
4650 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4651 && (dt == vect_external_def
4652 || dt == vect_constant_def))
4654 tree wide_scalar_type = build_nonstandard_integer_type
4655 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4656 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4657 wide_scalar_type);
4658 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4659 return NULL;
4663 /* If rhs1 is a comparison we need to move it into a
4664 separate statement. */
4665 if (TREE_CODE (rhs1) != SSA_NAME)
4667 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4668 if (rhs1_op0_type
4669 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4670 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4671 vectype2, stmt_vinfo);
4672 if (rhs1_op1_type
4673 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4674 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4675 vectype2, stmt_vinfo);
4676 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4677 rhs1_op0, rhs1_op1);
4678 rhs1 = tmp;
4679 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4680 rhs1_type);
4683 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4684 TYPE_VECTOR_SUBPARTS (vectype2)))
4685 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4686 else
4687 tmp = rhs1;
4689 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4690 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4691 gimple_assign_rhs2 (last_stmt),
4692 gimple_assign_rhs3 (last_stmt));
4694 *type_out = vectype1;
4695 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4697 return pattern_stmt;
4700 /* Now check for binary boolean operations requiring conversion for
4701 one of operands. */
4702 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4703 return NULL;
4705 if (rhs_code != BIT_IOR_EXPR
4706 && rhs_code != BIT_XOR_EXPR
4707 && rhs_code != BIT_AND_EXPR
4708 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4709 return NULL;
4711 rhs2 = gimple_assign_rhs2 (last_stmt);
4713 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4714 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4716 if (!rhs1_type || !rhs2_type
4717 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4718 return NULL;
4720 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4722 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4723 if (!vectype1)
4724 return NULL;
4725 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4727 else
4729 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4730 if (!vectype1)
4731 return NULL;
4732 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4735 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4736 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4738 *type_out = vectype1;
4739 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4741 return pattern_stmt;
4744 /* STMT_INFO is a load or store. If the load or store is conditional, return
4745 the boolean condition under which it occurs, otherwise return null. */
4747 static tree
4748 vect_get_load_store_mask (stmt_vec_info stmt_info)
4750 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4752 gcc_assert (gimple_assign_single_p (def_assign));
4753 return NULL_TREE;
4756 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4758 internal_fn ifn = gimple_call_internal_fn (def_call);
4759 int mask_index = internal_fn_mask_index (ifn);
4760 return gimple_call_arg (def_call, mask_index);
4763 gcc_unreachable ();
4766 /* Return MASK if MASK is suitable for masking an operation on vectors
4767 of type VECTYPE, otherwise convert it into such a form and return
4768 the result. Associate any conversion statements with STMT_INFO's
4769 pattern. */
4771 static tree
4772 vect_convert_mask_for_vectype (tree mask, tree vectype,
4773 stmt_vec_info stmt_info, vec_info *vinfo)
4775 tree mask_type = integer_type_for_mask (mask, vinfo);
4776 if (mask_type)
4778 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4779 if (mask_vectype
4780 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4781 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4782 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4784 return mask;
4787 /* Return the equivalent of:
4789 fold_convert (TYPE, VALUE)
4791 with the expectation that the operation will be vectorized.
4792 If new statements are needed, add them as pattern statements
4793 to STMT_INFO. */
4795 static tree
4796 vect_add_conversion_to_pattern (vec_info *vinfo,
4797 tree type, tree value, stmt_vec_info stmt_info)
4799 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4800 return value;
4802 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4803 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4804 append_pattern_def_seq (vinfo, stmt_info, conversion,
4805 get_vectype_for_scalar_type (vinfo, type));
4806 return new_value;
4809 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4810 internal function. Return the final statement on success and set
4811 *TYPE_OUT to the vector type being loaded or stored.
4813 This function only handles gathers and scatters that were recognized
4814 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4816 static gimple *
4817 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4818 stmt_vec_info stmt_info, tree *type_out)
4820 /* Currently we only support this for loop vectorization. */
4821 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4822 if (!loop_vinfo)
4823 return NULL;
4825 /* Make sure that we're looking at a gather load or scatter store. */
4826 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4827 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4828 return NULL;
4830 /* Get the boolean that controls whether the load or store happens.
4831 This is null if the operation is unconditional. */
4832 tree mask = vect_get_load_store_mask (stmt_info);
4834 /* Make sure that the target supports an appropriate internal
4835 function for the gather/scatter operation. */
4836 gather_scatter_info gs_info;
4837 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4838 || gs_info.ifn == IFN_LAST)
4839 return NULL;
4841 /* Convert the mask to the right form. */
4842 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4843 gs_info.element_type);
4844 if (mask)
4845 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4846 loop_vinfo);
4847 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
4848 || gs_info.ifn == IFN_MASK_GATHER_LOAD)
4849 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
4851 /* Get the invariant base and non-invariant offset, converting the
4852 latter to the same width as the vector elements. */
4853 tree base = gs_info.base;
4854 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4855 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4856 gs_info.offset, stmt_info);
4858 /* Build the new pattern statement. */
4859 tree scale = size_int (gs_info.scale);
4860 gcall *pattern_stmt;
4861 if (DR_IS_READ (dr))
4863 tree zero = build_zero_cst (gs_info.element_type);
4864 if (mask != NULL)
4865 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4866 offset, scale, zero, mask);
4867 else
4868 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4869 offset, scale, zero);
4870 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4871 gimple_call_set_lhs (pattern_stmt, load_lhs);
4873 else
4875 tree rhs = vect_get_store_rhs (stmt_info);
4876 if (mask != NULL)
4877 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
4878 base, offset, scale, rhs,
4879 mask);
4880 else
4881 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
4882 base, offset, scale, rhs);
4884 gimple_call_set_nothrow (pattern_stmt, true);
4886 /* Copy across relevant vectorization info and associate DR with the
4887 new pattern statement instead of the original statement. */
4888 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4889 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4891 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4892 *type_out = vectype;
4893 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4895 return pattern_stmt;
4898 /* Return true if TYPE is a non-boolean integer type. These are the types
4899 that we want to consider for narrowing. */
4901 static bool
4902 vect_narrowable_type_p (tree type)
4904 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4907 /* Return true if the operation given by CODE can be truncated to N bits
4908 when only N bits of the output are needed. This is only true if bit N+1
4909 of the inputs has no effect on the low N bits of the result. */
4911 static bool
4912 vect_truncatable_operation_p (tree_code code)
4914 switch (code)
4916 case PLUS_EXPR:
4917 case MINUS_EXPR:
4918 case MULT_EXPR:
4919 case BIT_AND_EXPR:
4920 case BIT_IOR_EXPR:
4921 case BIT_XOR_EXPR:
4922 case COND_EXPR:
4923 return true;
4925 default:
4926 return false;
4930 /* Record that STMT_INFO could be changed from operating on TYPE to
4931 operating on a type with the precision and sign given by PRECISION
4932 and SIGN respectively. PRECISION is an arbitrary bit precision;
4933 it might not be a whole number of bytes. */
4935 static void
4936 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4937 unsigned int precision, signop sign)
4939 /* Round the precision up to a whole number of bytes. */
4940 precision = vect_element_precision (precision);
4941 if (precision < TYPE_PRECISION (type)
4942 && (!stmt_info->operation_precision
4943 || stmt_info->operation_precision > precision))
4945 stmt_info->operation_precision = precision;
4946 stmt_info->operation_sign = sign;
4950 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4951 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4952 is an arbitrary bit precision; it might not be a whole number of bytes. */
4954 static void
4955 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4956 unsigned int min_input_precision)
4958 /* This operation in isolation only requires the inputs to have
4959 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4960 that MIN_INPUT_PRECISION is a natural precision for the chain
4961 as a whole. E.g. consider something like:
4963 unsigned short *x, *y;
4964 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4966 The right shift can be done on unsigned chars, and only requires the
4967 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4968 approach would mean turning a natural chain of single-vector unsigned
4969 short operations into one that truncates "*x" and then extends
4970 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4971 operation and one vector for each unsigned char operation.
4972 This would be a significant pessimization.
4974 Instead only propagate the maximum of this precision and the precision
4975 required by the users of the result. This means that we don't pessimize
4976 the case above but continue to optimize things like:
4978 unsigned char *y;
4979 unsigned short *x;
4980 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4982 Here we would truncate two vectors of *x to a single vector of
4983 unsigned chars and use single-vector unsigned char operations for
4984 everything else, rather than doing two unsigned short copies of
4985 "(*x & 0xf0) >> 4" and then truncating the result. */
4986 min_input_precision = MAX (min_input_precision,
4987 stmt_info->min_output_precision);
4989 if (min_input_precision < TYPE_PRECISION (type)
4990 && (!stmt_info->min_input_precision
4991 || stmt_info->min_input_precision > min_input_precision))
4992 stmt_info->min_input_precision = min_input_precision;
4995 /* Subroutine of vect_determine_min_output_precision. Return true if
4996 we can calculate a reduced number of output bits for STMT_INFO,
4997 whose result is LHS. */
4999 static bool
5000 vect_determine_min_output_precision_1 (vec_info *vinfo,
5001 stmt_vec_info stmt_info, tree lhs)
5003 /* Take the maximum precision required by users of the result. */
5004 unsigned int precision = 0;
5005 imm_use_iterator iter;
5006 use_operand_p use;
5007 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5009 gimple *use_stmt = USE_STMT (use);
5010 if (is_gimple_debug (use_stmt))
5011 continue;
5012 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5013 if (!use_stmt_info || !use_stmt_info->min_input_precision)
5014 return false;
5015 /* The input precision recorded for COND_EXPRs applies only to the
5016 "then" and "else" values. */
5017 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5018 if (assign
5019 && gimple_assign_rhs_code (assign) == COND_EXPR
5020 && use->use != gimple_assign_rhs2_ptr (assign)
5021 && use->use != gimple_assign_rhs3_ptr (assign))
5022 return false;
5023 precision = MAX (precision, use_stmt_info->min_input_precision);
5026 if (dump_enabled_p ())
5027 dump_printf_loc (MSG_NOTE, vect_location,
5028 "only the low %d bits of %T are significant\n",
5029 precision, lhs);
5030 stmt_info->min_output_precision = precision;
5031 return true;
5034 /* Calculate min_output_precision for STMT_INFO. */
5036 static void
5037 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5039 /* We're only interested in statements with a narrowable result. */
5040 tree lhs = gimple_get_lhs (stmt_info->stmt);
5041 if (!lhs
5042 || TREE_CODE (lhs) != SSA_NAME
5043 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5044 return;
5046 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5047 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5050 /* Use range information to decide whether STMT (described by STMT_INFO)
5051 could be done in a narrower type. This is effectively a forward
5052 propagation, since it uses context-independent information that applies
5053 to all users of an SSA name. */
5055 static void
5056 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5058 tree lhs = gimple_assign_lhs (stmt);
5059 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5060 return;
5062 tree type = TREE_TYPE (lhs);
5063 if (!vect_narrowable_type_p (type))
5064 return;
5066 /* First see whether we have any useful range information for the result. */
5067 unsigned int precision = TYPE_PRECISION (type);
5068 signop sign = TYPE_SIGN (type);
5069 wide_int min_value, max_value;
5070 if (!vect_get_range_info (lhs, &min_value, &max_value))
5071 return;
5073 tree_code code = gimple_assign_rhs_code (stmt);
5074 unsigned int nops = gimple_num_ops (stmt);
5076 if (!vect_truncatable_operation_p (code))
5077 /* Check that all relevant input operands are compatible, and update
5078 [MIN_VALUE, MAX_VALUE] to include their ranges. */
5079 for (unsigned int i = 1; i < nops; ++i)
5081 tree op = gimple_op (stmt, i);
5082 if (TREE_CODE (op) == INTEGER_CST)
5084 /* Don't require the integer to have RHS_TYPE (which it might
5085 not for things like shift amounts, etc.), but do require it
5086 to fit the type. */
5087 if (!int_fits_type_p (op, type))
5088 return;
5090 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
5091 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
5093 else if (TREE_CODE (op) == SSA_NAME)
5095 /* Ignore codes that don't take uniform arguments. */
5096 if (!types_compatible_p (TREE_TYPE (op), type))
5097 return;
5099 wide_int op_min_value, op_max_value;
5100 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5101 return;
5103 min_value = wi::min (min_value, op_min_value, sign);
5104 max_value = wi::max (max_value, op_max_value, sign);
5106 else
5107 return;
5110 /* Try to switch signed types for unsigned types if we can.
5111 This is better for two reasons. First, unsigned ops tend
5112 to be cheaper than signed ops. Second, it means that we can
5113 handle things like:
5115 signed char c;
5116 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5120 signed char c;
5121 unsigned short res_1 = (unsigned short) c & 0xff00;
5122 int res = (int) res_1;
5124 where the intermediate result res_1 has unsigned rather than
5125 signed type. */
5126 if (sign == SIGNED && !wi::neg_p (min_value))
5127 sign = UNSIGNED;
5129 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
5130 unsigned int precision1 = wi::min_precision (min_value, sign);
5131 unsigned int precision2 = wi::min_precision (max_value, sign);
5132 unsigned int value_precision = MAX (precision1, precision2);
5133 if (value_precision >= precision)
5134 return;
5136 if (dump_enabled_p ())
5137 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5138 " without loss of precision: %G",
5139 sign == SIGNED ? "signed" : "unsigned",
5140 value_precision, stmt);
5142 vect_set_operation_type (stmt_info, type, value_precision, sign);
5143 vect_set_min_input_precision (stmt_info, type, value_precision);
5146 /* Use information about the users of STMT's result to decide whether
5147 STMT (described by STMT_INFO) could be done in a narrower type.
5148 This is effectively a backward propagation. */
5150 static void
5151 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5153 tree_code code = gimple_assign_rhs_code (stmt);
5154 unsigned int opno = (code == COND_EXPR ? 2 : 1);
5155 tree type = TREE_TYPE (gimple_op (stmt, opno));
5156 if (!vect_narrowable_type_p (type))
5157 return;
5159 unsigned int precision = TYPE_PRECISION (type);
5160 unsigned int operation_precision, min_input_precision;
5161 switch (code)
5163 CASE_CONVERT:
5164 /* Only the bits that contribute to the output matter. Don't change
5165 the precision of the operation itself. */
5166 operation_precision = precision;
5167 min_input_precision = stmt_info->min_output_precision;
5168 break;
5170 case LSHIFT_EXPR:
5171 case RSHIFT_EXPR:
5173 tree shift = gimple_assign_rhs2 (stmt);
5174 if (TREE_CODE (shift) != INTEGER_CST
5175 || !wi::ltu_p (wi::to_widest (shift), precision))
5176 return;
5177 unsigned int const_shift = TREE_INT_CST_LOW (shift);
5178 if (code == LSHIFT_EXPR)
5180 /* Avoid creating an undefined shift.
5182 ??? We could instead use min_output_precision as-is and
5183 optimize out-of-range shifts to zero. However, only
5184 degenerate testcases shift away all their useful input data,
5185 and it isn't natural to drop input operations in the middle
5186 of vectorization. This sort of thing should really be
5187 handled before vectorization. */
5188 operation_precision = MAX (stmt_info->min_output_precision,
5189 const_shift + 1);
5190 /* We need CONST_SHIFT fewer bits of the input. */
5191 min_input_precision = (MAX (operation_precision, const_shift)
5192 - const_shift);
5194 else
5196 /* We need CONST_SHIFT extra bits to do the operation. */
5197 operation_precision = (stmt_info->min_output_precision
5198 + const_shift);
5199 min_input_precision = operation_precision;
5201 break;
5204 default:
5205 if (vect_truncatable_operation_p (code))
5207 /* Input bit N has no effect on output bits N-1 and lower. */
5208 operation_precision = stmt_info->min_output_precision;
5209 min_input_precision = operation_precision;
5210 break;
5212 return;
5215 if (operation_precision < precision)
5217 if (dump_enabled_p ())
5218 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5219 " without affecting users: %G",
5220 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5221 operation_precision, stmt);
5222 vect_set_operation_type (stmt_info, type, operation_precision,
5223 TYPE_SIGN (type));
5225 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5228 /* Return true if the statement described by STMT_INFO sets a boolean
5229 SSA_NAME and if we know how to vectorize this kind of statement using
5230 vector mask types. */
5232 static bool
5233 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5235 tree lhs = gimple_get_lhs (stmt_info->stmt);
5236 if (!lhs
5237 || TREE_CODE (lhs) != SSA_NAME
5238 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5239 return false;
5241 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5243 tree_code rhs_code = gimple_assign_rhs_code (assign);
5244 switch (rhs_code)
5246 CASE_CONVERT:
5247 case SSA_NAME:
5248 case BIT_NOT_EXPR:
5249 case BIT_IOR_EXPR:
5250 case BIT_XOR_EXPR:
5251 case BIT_AND_EXPR:
5252 return true;
5254 default:
5255 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5258 else if (is_a <gphi *> (stmt_info->stmt))
5259 return true;
5260 return false;
5263 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5264 a vector mask type instead of a normal vector type. Record the
5265 result in STMT_INFO->mask_precision. */
5267 static void
5268 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5270 if (!possible_vector_mask_operation_p (stmt_info))
5271 return;
5273 /* If at least one boolean input uses a vector mask type,
5274 pick the mask type with the narrowest elements.
5276 ??? This is the traditional behavior. It should always produce
5277 the smallest number of operations, but isn't necessarily the
5278 optimal choice. For example, if we have:
5280 a = b & c
5282 where:
5284 - the user of a wants it to have a mask type for 16-bit elements (M16)
5285 - b also uses M16
5286 - c uses a mask type for 8-bit elements (M8)
5288 then picking M8 gives:
5290 - 1 M16->M8 pack for b
5291 - 1 M8 AND for a
5292 - 2 M8->M16 unpacks for the user of a
5294 whereas picking M16 would have given:
5296 - 2 M8->M16 unpacks for c
5297 - 2 M16 ANDs for a
5299 The number of operations are equal, but M16 would have given
5300 a shorter dependency chain and allowed more ILP. */
5301 unsigned int precision = ~0U;
5302 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5304 unsigned int nops = gimple_num_ops (assign);
5305 for (unsigned int i = 1; i < nops; ++i)
5307 tree rhs = gimple_op (assign, i);
5308 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5309 continue;
5311 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5312 if (!def_stmt_info)
5313 /* Don't let external or constant operands influence the choice.
5314 We can convert them to whichever vector type we pick. */
5315 continue;
5317 if (def_stmt_info->mask_precision)
5319 if (precision > def_stmt_info->mask_precision)
5320 precision = def_stmt_info->mask_precision;
5324 /* If the statement compares two values that shouldn't use vector masks,
5325 try comparing the values as normal scalars instead. */
5326 tree_code rhs_code = gimple_assign_rhs_code (assign);
5327 if (precision == ~0U
5328 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5330 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5331 scalar_mode mode;
5332 tree vectype, mask_type;
5333 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5334 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5335 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5336 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5337 precision = GET_MODE_BITSIZE (mode);
5340 else
5342 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5343 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5345 tree rhs = gimple_phi_arg_def (phi, i);
5347 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5348 if (!def_stmt_info)
5349 /* Don't let external or constant operands influence the choice.
5350 We can convert them to whichever vector type we pick. */
5351 continue;
5353 if (def_stmt_info->mask_precision)
5355 if (precision > def_stmt_info->mask_precision)
5356 precision = def_stmt_info->mask_precision;
5361 if (dump_enabled_p ())
5363 if (precision == ~0U)
5364 dump_printf_loc (MSG_NOTE, vect_location,
5365 "using normal nonmask vectors for %G",
5366 stmt_info->stmt);
5367 else
5368 dump_printf_loc (MSG_NOTE, vect_location,
5369 "using boolean precision %d for %G",
5370 precision, stmt_info->stmt);
5373 stmt_info->mask_precision = precision;
5376 /* Handle vect_determine_precisions for STMT_INFO, given that we
5377 have already done so for the users of its result. */
5379 void
5380 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5382 vect_determine_min_output_precision (vinfo, stmt_info);
5383 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5385 vect_determine_precisions_from_range (stmt_info, stmt);
5386 vect_determine_precisions_from_users (stmt_info, stmt);
5390 /* Walk backwards through the vectorizable region to determine the
5391 values of these fields:
5393 - min_output_precision
5394 - min_input_precision
5395 - operation_precision
5396 - operation_sign. */
5398 void
5399 vect_determine_precisions (vec_info *vinfo)
5401 DUMP_VECT_SCOPE ("vect_determine_precisions");
5403 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5405 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5406 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5407 unsigned int nbbs = loop->num_nodes;
5409 for (unsigned int i = 0; i < nbbs; i++)
5411 basic_block bb = bbs[i];
5412 for (auto gsi = gsi_start_phis (bb);
5413 !gsi_end_p (gsi); gsi_next (&gsi))
5415 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5416 if (stmt_info)
5417 vect_determine_mask_precision (vinfo, stmt_info);
5419 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5420 if (!is_gimple_debug (gsi_stmt (si)))
5421 vect_determine_mask_precision
5422 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5424 for (unsigned int i = 0; i < nbbs; i++)
5426 basic_block bb = bbs[nbbs - i - 1];
5427 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5428 !gsi_end_p (si); gsi_prev (&si))
5429 if (!is_gimple_debug (gsi_stmt (si)))
5430 vect_determine_stmt_precisions
5431 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5432 for (auto gsi = gsi_start_phis (bb);
5433 !gsi_end_p (gsi); gsi_next (&gsi))
5435 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5436 if (stmt_info)
5437 vect_determine_stmt_precisions (vinfo, stmt_info);
5441 else
5443 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5444 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5446 basic_block bb = bb_vinfo->bbs[i];
5447 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5449 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5450 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5451 vect_determine_mask_precision (vinfo, stmt_info);
5453 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5455 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5456 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5457 vect_determine_mask_precision (vinfo, stmt_info);
5460 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5462 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5463 !gsi_end_p (gsi); gsi_prev (&gsi))
5465 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5466 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5467 vect_determine_stmt_precisions (vinfo, stmt_info);
5469 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5470 !gsi_end_p (gsi); gsi_next (&gsi))
5472 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5473 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5474 vect_determine_stmt_precisions (vinfo, stmt_info);
5480 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5482 struct vect_recog_func
5484 vect_recog_func_ptr fn;
5485 const char *name;
5488 /* Note that ordering matters - the first pattern matching on a stmt is
5489 taken which means usually the more complex one needs to preceed the
5490 less comples onex (widen_sum only after dot_prod or sad for example). */
5491 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5492 { vect_recog_over_widening_pattern, "over_widening" },
5493 /* Must come after over_widening, which narrows the shift as much as
5494 possible beforehand. */
5495 { vect_recog_average_pattern, "average" },
5496 { vect_recog_mulhs_pattern, "mult_high" },
5497 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5498 { vect_recog_widen_mult_pattern, "widen_mult" },
5499 { vect_recog_dot_prod_pattern, "dot_prod" },
5500 { vect_recog_sad_pattern, "sad" },
5501 { vect_recog_widen_sum_pattern, "widen_sum" },
5502 { vect_recog_pow_pattern, "pow" },
5503 { vect_recog_popcount_pattern, "popcount" },
5504 { vect_recog_widen_shift_pattern, "widen_shift" },
5505 { vect_recog_rotate_pattern, "rotate" },
5506 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5507 { vect_recog_divmod_pattern, "divmod" },
5508 { vect_recog_mult_pattern, "mult" },
5509 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5510 { vect_recog_bool_pattern, "bool" },
5511 /* This must come before mask conversion, and includes the parts
5512 of mask conversion that are needed for gather and scatter
5513 internal functions. */
5514 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5515 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5516 { vect_recog_widen_plus_pattern, "widen_plus" },
5517 { vect_recog_widen_minus_pattern, "widen_minus" },
5520 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5522 /* Mark statements that are involved in a pattern. */
5524 void
5525 vect_mark_pattern_stmts (vec_info *vinfo,
5526 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5527 tree pattern_vectype)
5529 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5530 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5532 gimple *orig_pattern_stmt = NULL;
5533 if (is_pattern_stmt_p (orig_stmt_info))
5535 /* We're replacing a statement in an existing pattern definition
5536 sequence. */
5537 orig_pattern_stmt = orig_stmt_info->stmt;
5538 if (dump_enabled_p ())
5539 dump_printf_loc (MSG_NOTE, vect_location,
5540 "replacing earlier pattern %G", orig_pattern_stmt);
5542 /* To keep the book-keeping simple, just swap the lhs of the
5543 old and new statements, so that the old one has a valid but
5544 unused lhs. */
5545 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5546 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5547 gimple_set_lhs (pattern_stmt, old_lhs);
5549 if (dump_enabled_p ())
5550 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5552 /* Switch to the statement that ORIG replaces. */
5553 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5555 /* We shouldn't be replacing the main pattern statement. */
5556 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5557 != orig_pattern_stmt);
5560 if (def_seq)
5561 for (gimple_stmt_iterator si = gsi_start (def_seq);
5562 !gsi_end_p (si); gsi_next (&si))
5564 if (dump_enabled_p ())
5565 dump_printf_loc (MSG_NOTE, vect_location,
5566 "extra pattern stmt: %G", gsi_stmt (si));
5567 stmt_vec_info pattern_stmt_info
5568 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5569 orig_stmt_info, pattern_vectype);
5570 /* Stmts in the def sequence are not vectorizable cycle or
5571 induction defs, instead they should all be vect_internal_def
5572 feeding the main pattern stmt which retains this def type. */
5573 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5576 if (orig_pattern_stmt)
5578 vect_init_pattern_stmt (vinfo, pattern_stmt,
5579 orig_stmt_info, pattern_vectype);
5581 /* Insert all the new pattern statements before the original one. */
5582 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5583 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5584 orig_def_seq);
5585 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5586 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5588 /* Remove the pattern statement that this new pattern replaces. */
5589 gsi_remove (&gsi, false);
5591 else
5592 vect_set_pattern_stmt (vinfo,
5593 pattern_stmt, orig_stmt_info, pattern_vectype);
5595 /* Transfer reduction path info to the pattern. */
5596 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5598 tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5599 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5600 /* Search the pattern def sequence and the main pattern stmt. Note
5601 we may have inserted all into a containing pattern def sequence
5602 so the following is a bit awkward. */
5603 gimple_stmt_iterator si;
5604 gimple *s;
5605 if (def_seq)
5607 si = gsi_start (def_seq);
5608 s = gsi_stmt (si);
5609 gsi_next (&si);
5611 else
5613 si = gsi_none ();
5614 s = pattern_stmt;
5618 bool found = false;
5619 for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5620 if (gimple_op (s, i) == lookfor)
5622 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5623 lookfor = gimple_get_lhs (s);
5624 found = true;
5625 break;
5627 if (s == pattern_stmt)
5629 if (!found && dump_enabled_p ())
5630 dump_printf_loc (MSG_NOTE, vect_location,
5631 "failed to update reduction index.\n");
5632 break;
5634 if (gsi_end_p (si))
5635 s = pattern_stmt;
5636 else
5638 s = gsi_stmt (si);
5639 if (s == pattern_stmt)
5640 /* Found the end inside a bigger pattern def seq. */
5641 si = gsi_none ();
5642 else
5643 gsi_next (&si);
5645 } while (1);
5649 /* Function vect_pattern_recog_1
5651 Input:
5652 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5653 computation pattern.
5654 STMT_INFO: A stmt from which the pattern search should start.
5656 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5657 a sequence of statements that has the same functionality and can be
5658 used to replace STMT_INFO. It returns the last statement in the sequence
5659 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5660 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5661 statement, having first checked that the target supports the new operation
5662 in that type.
5664 This function also does some bookkeeping, as explained in the documentation
5665 for vect_recog_pattern. */
5667 static void
5668 vect_pattern_recog_1 (vec_info *vinfo,
5669 vect_recog_func *recog_func, stmt_vec_info stmt_info)
5671 gimple *pattern_stmt;
5672 loop_vec_info loop_vinfo;
5673 tree pattern_vectype;
5675 /* If this statement has already been replaced with pattern statements,
5676 leave the original statement alone, since the first match wins.
5677 Instead try to match against the definition statements that feed
5678 the main pattern statement. */
5679 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5681 gimple_stmt_iterator gsi;
5682 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5683 !gsi_end_p (gsi); gsi_next (&gsi))
5684 vect_pattern_recog_1 (vinfo, recog_func,
5685 vinfo->lookup_stmt (gsi_stmt (gsi)));
5686 return;
5689 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5690 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5691 if (!pattern_stmt)
5693 /* Clear any half-formed pattern definition sequence. */
5694 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5695 return;
5698 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5699 gcc_assert (pattern_vectype);
5701 /* Found a vectorizable pattern. */
5702 if (dump_enabled_p ())
5703 dump_printf_loc (MSG_NOTE, vect_location,
5704 "%s pattern recognized: %G",
5705 recog_func->name, pattern_stmt);
5707 /* Mark the stmts that are involved in the pattern. */
5708 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5710 /* Patterns cannot be vectorized using SLP, because they change the order of
5711 computation. */
5712 if (loop_vinfo)
5714 unsigned ix, ix2;
5715 stmt_vec_info *elem_ptr;
5716 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5717 elem_ptr, *elem_ptr == stmt_info);
5722 /* Function vect_pattern_recog
5724 Input:
5725 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5726 computation idioms.
5728 Output - for each computation idiom that is detected we create a new stmt
5729 that provides the same functionality and that can be vectorized. We
5730 also record some information in the struct_stmt_info of the relevant
5731 stmts, as explained below:
5733 At the entry to this function we have the following stmts, with the
5734 following initial value in the STMT_VINFO fields:
5736 stmt in_pattern_p related_stmt vec_stmt
5737 S1: a_i = .... - - -
5738 S2: a_2 = ..use(a_i).. - - -
5739 S3: a_1 = ..use(a_2).. - - -
5740 S4: a_0 = ..use(a_1).. - - -
5741 S5: ... = ..use(a_0).. - - -
5743 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5744 represented by a single stmt. We then:
5745 - create a new stmt S6 equivalent to the pattern (the stmt is not
5746 inserted into the code)
5747 - fill in the STMT_VINFO fields as follows:
5749 in_pattern_p related_stmt vec_stmt
5750 S1: a_i = .... - - -
5751 S2: a_2 = ..use(a_i).. - - -
5752 S3: a_1 = ..use(a_2).. - - -
5753 S4: a_0 = ..use(a_1).. true S6 -
5754 '---> S6: a_new = .... - S4 -
5755 S5: ... = ..use(a_0).. - - -
5757 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5758 to each other through the RELATED_STMT field).
5760 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5761 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5762 remain irrelevant unless used by stmts other than S4.
5764 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5765 (because they are marked as irrelevant). It will vectorize S6, and record
5766 a pointer to the new vector stmt VS6 from S6 (as usual).
5767 S4 will be skipped, and S5 will be vectorized as usual:
5769 in_pattern_p related_stmt vec_stmt
5770 S1: a_i = .... - - -
5771 S2: a_2 = ..use(a_i).. - - -
5772 S3: a_1 = ..use(a_2).. - - -
5773 > VS6: va_new = .... - - -
5774 S4: a_0 = ..use(a_1).. true S6 VS6
5775 '---> S6: a_new = .... - S4 VS6
5776 > VS5: ... = ..vuse(va_new).. - - -
5777 S5: ... = ..use(a_0).. - - -
5779 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5780 elsewhere), and we'll end up with:
5782 VS6: va_new = ....
5783 VS5: ... = ..vuse(va_new)..
5785 In case of more than one pattern statements, e.g., widen-mult with
5786 intermediate type:
5788 S1 a_t = ;
5789 S2 a_T = (TYPE) a_t;
5790 '--> S3: a_it = (interm_type) a_t;
5791 S4 prod_T = a_T * CONST;
5792 '--> S5: prod_T' = a_it w* CONST;
5794 there may be other users of a_T outside the pattern. In that case S2 will
5795 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5796 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5797 be recorded in S3. */
5799 void
5800 vect_pattern_recog (vec_info *vinfo)
5802 class loop *loop;
5803 basic_block *bbs;
5804 unsigned int nbbs;
5805 gimple_stmt_iterator si;
5806 unsigned int i, j;
5808 vect_determine_precisions (vinfo);
5810 DUMP_VECT_SCOPE ("vect_pattern_recog");
5812 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5814 loop = LOOP_VINFO_LOOP (loop_vinfo);
5815 bbs = LOOP_VINFO_BBS (loop_vinfo);
5816 nbbs = loop->num_nodes;
5818 /* Scan through the loop stmts, applying the pattern recognition
5819 functions starting at each stmt visited: */
5820 for (i = 0; i < nbbs; i++)
5822 basic_block bb = bbs[i];
5823 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5825 if (is_gimple_debug (gsi_stmt (si)))
5826 continue;
5827 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5828 /* Scan over all generic vect_recog_xxx_pattern functions. */
5829 for (j = 0; j < NUM_PATTERNS; j++)
5830 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
5831 stmt_info);
5835 else
5837 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5838 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5839 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
5840 !gsi_end_p (gsi); gsi_next (&gsi))
5842 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
5843 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
5844 continue;
5846 /* Scan over all generic vect_recog_xxx_pattern functions. */
5847 for (j = 0; j < NUM_PATTERNS; j++)
5848 vect_pattern_recog_1 (vinfo,
5849 &vect_vect_recog_func_ptrs[j], stmt_info);
5853 /* After this no more add_stmt calls are allowed. */
5854 vinfo->stmt_vec_info_ro = true;